ML for Gas Adsorption¶
Import packages we will need¶
# basics
import os
import numpy as np
import pprint as pp
# pandas is used to read/process data
import pandas as pd
# machine learning dependencies
# scaling of data
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
# train/test split
from sklearn.model_selection import train_test_split
# model selection
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
# the KRR model
from sklearn.kernel_ridge import KernelRidge
# linear model
from sklearn.linear_model import LinearRegression
# pipeline to streamline modeling pipelines
from sklearn.pipeline import Pipeline
# principal component analysis
from sklearn.decomposition import PCA
# polynomial kernel
from sklearn.metrics.pairwise import polynomial_kernel
# Dummy model as baseline
from sklearn.dummy import DummyClassifier, DummyRegressor
# Variance Threshold for feature selection
from sklearn.feature_selection import VarianceThreshold, SelectFromModel
# metrics to measure model performance
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score,
mean_absolute_error, mean_squared_error, max_error)
# save/load models
import joblib
# For the permutation importance implementation
from joblib import Parallel
from joblib import delayed
from sklearn.metrics import check_scoring
from sklearn.utils import Bunch
from sklearn.utils import check_random_state
from sklearn.utils import check_array
# plotting
import matplotlib.pyplot as plt
%matplotlib inline
from pymatviz.parity import hist_density
RANDOM_SEED = 4242424242
DATA_DIR = 'data'
DATA_FILE = os.path.join(DATA_DIR, 'data.csv')
np.random.seed(RANDOM_SEED)
other_descriptors = ["CellV [A^3]"]
geometric_descriptors = [
"Di",
"Df",
"Dif",
"density [g/cm^3]",
"total_SA_volumetric",
"total_SA_gravimetric",
"total_POV_volumetric",
"total_POV_gravimetric",
]
linker_descriptors = [
"f-lig-chi-0",
"f-lig-chi-1",
"f-lig-chi-2",
"f-lig-chi-3",
"f-lig-Z-0",
"f-lig-Z-1",
"f-lig-Z-2",
"f-lig-Z-3",
"f-lig-I-0",
"f-lig-I-1",
"f-lig-I-2",
"f-lig-I-3",
"f-lig-T-0",
"f-lig-T-1",
"f-lig-T-2",
"f-lig-T-3",
"f-lig-S-0",
"f-lig-S-1",
"f-lig-S-2",
"f-lig-S-3",
"lc-chi-0-all",
"lc-chi-1-all",
"lc-chi-2-all",
"lc-chi-3-all",
"lc-Z-0-all",
"lc-Z-1-all",
"lc-Z-2-all",
"lc-Z-3-all",
"lc-I-0-all",
"lc-I-1-all",
"lc-I-2-all",
"lc-I-3-all",
"lc-T-0-all",
"lc-T-1-all",
"lc-T-2-all",
"lc-T-3-all",
"lc-S-0-all",
"lc-S-1-all",
"lc-S-2-all",
"lc-S-3-all",
"lc-alpha-0-all",
"lc-alpha-1-all",
"lc-alpha-2-all",
"lc-alpha-3-all",
"D_lc-chi-0-all",
"D_lc-chi-1-all",
"D_lc-chi-2-all",
"D_lc-chi-3-all",
"D_lc-Z-0-all",
"D_lc-Z-1-all",
"D_lc-Z-2-all",
"D_lc-Z-3-all",
"D_lc-I-0-all",
"D_lc-I-1-all",
"D_lc-I-2-all",
"D_lc-I-3-all",
"D_lc-T-0-all",
"D_lc-T-1-all",
"D_lc-T-2-all",
"D_lc-T-3-all",
"D_lc-S-0-all",
"D_lc-S-1-all",
"D_lc-S-2-all",
"D_lc-S-3-all",
"D_lc-alpha-0-all",
"D_lc-alpha-1-all",
"D_lc-alpha-2-all",
"D_lc-alpha-3-all",
]
metalcenter_descriptors = [
"mc_CRY-chi-0-all",
"mc_CRY-chi-1-all",
"mc_CRY-chi-2-all",
"mc_CRY-chi-3-all",
"mc_CRY-Z-0-all",
"mc_CRY-Z-1-all",
"mc_CRY-Z-2-all",
"mc_CRY-Z-3-all",
"mc_CRY-I-0-all",
"mc_CRY-I-1-all",
"mc_CRY-I-2-all",
"mc_CRY-I-3-all",
"mc_CRY-T-0-all",
"mc_CRY-T-1-all",
"mc_CRY-T-2-all",
"mc_CRY-T-3-all",
"mc_CRY-S-0-all",
"mc_CRY-S-1-all",
"mc_CRY-S-2-all",
"mc_CRY-S-3-all",
"D_mc_CRY-chi-0-all",
"D_mc_CRY-chi-1-all",
"D_mc_CRY-chi-2-all",
"D_mc_CRY-chi-3-all",
"D_mc_CRY-Z-0-all",
"D_mc_CRY-Z-1-all",
"D_mc_CRY-Z-2-all",
"D_mc_CRY-Z-3-all",
"D_mc_CRY-I-0-all",
"D_mc_CRY-I-1-all",
"D_mc_CRY-I-2-all",
"D_mc_CRY-I-3-all",
"D_mc_CRY-T-0-all",
"D_mc_CRY-T-1-all",
"D_mc_CRY-T-2-all",
"D_mc_CRY-T-3-all",
"D_mc_CRY-S-0-all",
"D_mc_CRY-S-1-all",
"D_mc_CRY-S-2-all",
"D_mc_CRY-S-3-all",
]
functionalgroup_descriptors = [
"func-chi-0-all",
"func-chi-1-all",
"func-chi-2-all",
"func-chi-3-all",
"func-Z-0-all",
"func-Z-1-all",
"func-Z-2-all",
"func-Z-3-all",
"func-I-0-all",
"func-I-1-all",
"func-I-2-all",
"func-I-3-all",
"func-T-0-all",
"func-T-1-all",
"func-T-2-all",
"func-T-3-all",
"func-S-0-all",
"func-S-1-all",
"func-S-2-all",
"func-S-3-all",
"func-alpha-0-all",
"func-alpha-1-all",
"func-alpha-2-all",
"func-alpha-3-all",
"D_func-chi-0-all",
"D_func-chi-1-all",
"D_func-chi-2-all",
"D_func-chi-3-all",
"D_func-Z-0-all",
"D_func-Z-1-all",
"D_func-Z-2-all",
"D_func-Z-3-all",
"D_func-I-0-all",
"D_func-I-1-all",
"D_func-I-2-all",
"D_func-I-3-all",
"D_func-T-0-all",
"D_func-T-1-all",
"D_func-T-2-all",
"D_func-T-3-all",
"D_func-S-0-all",
"D_func-S-1-all",
"D_func-S-2-all",
"D_func-S-3-all",
"D_func-alpha-0-all",
"D_func-alpha-1-all",
"D_func-alpha-2-all",
"D_func-alpha-3-all",
]
summed_linker_descriptors = [
"sum-f-lig-chi-0",
"sum-f-lig-chi-1",
"sum-f-lig-chi-2",
"sum-f-lig-chi-3",
"sum-f-lig-Z-0",
"sum-f-lig-Z-1",
"sum-f-lig-Z-2",
"sum-f-lig-Z-3",
"sum-f-lig-I-0",
"sum-f-lig-I-1",
"sum-f-lig-I-2",
"sum-f-lig-I-3",
"sum-f-lig-T-0",
"sum-f-lig-T-1",
"sum-f-lig-T-2",
"sum-f-lig-T-3",
"sum-f-lig-S-0",
"sum-f-lig-S-1",
"sum-f-lig-S-2",
"sum-f-lig-S-3",
"sum-lc-chi-0-all",
"sum-lc-chi-1-all",
"sum-lc-chi-2-all",
"sum-lc-chi-3-all",
"sum-lc-Z-0-all",
"sum-lc-Z-1-all",
"sum-lc-Z-2-all",
"sum-lc-Z-3-all",
"sum-lc-I-0-all",
"sum-lc-I-1-all",
"sum-lc-I-2-all",
"sum-lc-I-3-all",
"sum-lc-T-0-all",
"sum-lc-T-1-all",
"sum-lc-T-2-all",
"sum-lc-T-3-all",
"sum-lc-S-0-all",
"sum-lc-S-1-all",
"sum-lc-S-2-all",
"sum-lc-S-3-all",
"sum-lc-alpha-0-all",
"sum-lc-alpha-1-all",
"sum-lc-alpha-2-all",
"sum-lc-alpha-3-all",
"sum-D_lc-chi-0-all",
"sum-D_lc-chi-1-all",
"sum-D_lc-chi-2-all",
"sum-D_lc-chi-3-all",
"sum-D_lc-Z-0-all",
"sum-D_lc-Z-1-all",
"sum-D_lc-Z-2-all",
"sum-D_lc-Z-3-all",
"sum-D_lc-I-0-all",
"sum-D_lc-I-1-all",
"sum-D_lc-I-2-all",
"sum-D_lc-I-3-all",
"sum-D_lc-T-0-all",
"sum-D_lc-T-1-all",
"sum-D_lc-T-2-all",
"sum-D_lc-T-3-all",
"sum-D_lc-S-0-all",
"sum-D_lc-S-1-all",
"sum-D_lc-S-2-all",
"sum-D_lc-S-3-all",
"sum-D_lc-alpha-0-all",
"sum-D_lc-alpha-1-all",
"sum-D_lc-alpha-2-all",
"sum-D_lc-alpha-3-all",
]
summed_metalcenter_descriptors = [
"sum-mc_CRY-chi-0-all",
"sum-mc_CRY-chi-1-all",
"sum-mc_CRY-chi-2-all",
"sum-mc_CRY-chi-3-all",
"sum-mc_CRY-Z-0-all",
"sum-mc_CRY-Z-1-all",
"sum-mc_CRY-Z-2-all",
"sum-mc_CRY-Z-3-all",
"sum-mc_CRY-I-0-all",
"sum-mc_CRY-I-1-all",
"sum-mc_CRY-I-2-all",
"sum-mc_CRY-I-3-all",
"sum-mc_CRY-T-0-all",
"sum-mc_CRY-T-1-all",
"sum-mc_CRY-T-2-all",
"sum-mc_CRY-T-3-all",
"sum-mc_CRY-S-0-all",
"sum-mc_CRY-S-1-all",
"sum-mc_CRY-S-2-all",
"sum-mc_CRY-S-3-all",
"sum-D_mc_CRY-chi-0-all",
"sum-D_mc_CRY-chi-1-all",
"sum-D_mc_CRY-chi-2-all",
"sum-D_mc_CRY-chi-3-all",
"sum-D_mc_CRY-Z-0-all",
"sum-D_mc_CRY-Z-1-all",
"sum-D_mc_CRY-Z-2-all",
"sum-D_mc_CRY-Z-3-all",
"sum-D_mc_CRY-I-0-all",
"sum-D_mc_CRY-I-1-all",
"sum-D_mc_CRY-I-2-all",
"sum-D_mc_CRY-I-3-all",
"sum-D_mc_CRY-T-0-all",
"sum-D_mc_CRY-T-1-all",
"sum-D_mc_CRY-T-2-all",
"sum-D_mc_CRY-T-3-all",
"sum-D_mc_CRY-S-0-all",
"sum-D_mc_CRY-S-1-all",
"sum-D_mc_CRY-S-2-all",
"sum-D_mc_CRY-S-3-all",
]
summed_functionalgroup_descriptors = [
"sum-func-chi-0-all",
"sum-func-chi-1-all",
"sum-func-chi-2-all",
"sum-func-chi-3-all",
"sum-func-Z-0-all",
"sum-func-Z-1-all",
"sum-func-Z-2-all",
"sum-func-Z-3-all",
"sum-func-I-0-all",
"sum-func-I-1-all",
"sum-func-I-2-all",
"sum-func-I-3-all",
"sum-func-T-0-all",
"sum-func-T-1-all",
"sum-func-T-2-all",
"sum-func-T-3-all",
"sum-func-S-0-all",
"sum-func-S-1-all",
"sum-func-S-2-all",
"sum-func-S-3-all",
"sum-func-alpha-0-all",
"sum-func-alpha-1-all",
"sum-func-alpha-2-all",
"sum-func-alpha-3-all",
"sum-D_func-chi-0-all",
"sum-D_func-chi-1-all",
"sum-D_func-chi-2-all",
"sum-D_func-chi-3-all",
"sum-D_func-Z-0-all",
"sum-D_func-Z-1-all",
"sum-D_func-Z-2-all",
"sum-D_func-Z-3-all",
"sum-D_func-I-0-all",
"sum-D_func-I-1-all",
"sum-D_func-I-2-all",
"sum-D_func-I-3-all",
"sum-D_func-T-0-all",
"sum-D_func-T-1-all",
"sum-D_func-T-2-all",
"sum-D_func-T-3-all",
"sum-D_func-S-0-all",
"sum-D_func-S-1-all",
"sum-D_func-S-2-all",
"sum-D_func-S-3-all",
"sum-D_func-alpha-0-all",
"sum-D_func-alpha-1-all",
"sum-D_func-alpha-2-all",
"sum-D_func-alpha-3-all",
]
$\color{DarkBlue}{\textsf{Short question}}$
- We declared a global variable to fix the random seed (
RANDOM_SEED). Why did we do this?
Answer: So that any random process produces the same results when run. Any sequence called upon np.random will follow the same sequence of random numbers, yielding stable results.
Hands-on Project: Carbon-dioxide uptake in MOFs¶
In this exercise we will build a model that can predict the CO$_2$ uptake of metal-organic frameworks (MOFs), which are crystalline materials consisting of inorganic metal nodes linked by organic linkers.

There are two main learning goals for this exercise:
Understand the typical workflow for machine learning in materials science. We will cover exploratory data analysis (EDA) and supervised learning (KRR).
Get familiar with some Python packages that are useful for data analysis and visualization.
At the end of the exercise, you will produce an interactive plot like the one below, comparing the predictions of your model against CO$_2$ computed with GCMC simulations. The histograms show the distributions of the errors on the training set (left) and on the test set (right).

This exercise requires a basic knowledge of Python, e.g. that you can write list comprehensions, and are able to read documentation of functions provided by Python packages.
You will be asked to provide some function arguments (indicated by #fillme comments).
You can execute all the following code cells by pressing SHIFT and ENTER and get informations about the functions by pressing TAB when you are between the parentheses (see the notes for more tips).
Also the sklearn documentation is a great source of reference with many explanations and examples.
In pandas dataframe (df) you can select columns using their name by running df[columnname]. If at any point you think that the dataset is too large for your computer, you can select a subset using df.sample() or by making the test set larger in the train/test split (section 2).
1. Import the data¶
df = pd.read_csv(DATA_FILE)
Let's take a look at the first few rows to see if everythings seems reasonable ...
pd.options.display.max_columns=df.shape[1]
df.head()
| ASA [m^2/cm^3] | CellV [A^3] | Df | Di | Dif | NASA [m^2/cm^3] | POAV [cm^3/g] | POAVF | PONAV [cm^3/g] | PONAVF | density [g/cm^3] | MOFname | total_SA_volumetric | total_SA_gravimetric | total_POV_volumetric | total_POV_gravimetric | mc_CRY-chi-0-all | mc_CRY-chi-1-all | mc_CRY-chi-2-all | mc_CRY-chi-3-all | mc_CRY-Z-0-all | mc_CRY-Z-1-all | mc_CRY-Z-2-all | mc_CRY-Z-3-all | mc_CRY-I-0-all | mc_CRY-I-1-all | mc_CRY-I-2-all | mc_CRY-I-3-all | mc_CRY-T-0-all | mc_CRY-T-1-all | mc_CRY-T-2-all | mc_CRY-T-3-all | mc_CRY-S-0-all | mc_CRY-S-1-all | mc_CRY-S-2-all | mc_CRY-S-3-all | D_mc_CRY-chi-0-all | D_mc_CRY-chi-1-all | D_mc_CRY-chi-2-all | D_mc_CRY-chi-3-all | D_mc_CRY-Z-0-all | D_mc_CRY-Z-1-all | D_mc_CRY-Z-2-all | D_mc_CRY-Z-3-all | D_mc_CRY-I-0-all | D_mc_CRY-I-1-all | D_mc_CRY-I-2-all | D_mc_CRY-I-3-all | D_mc_CRY-T-0-all | D_mc_CRY-T-1-all | D_mc_CRY-T-2-all | D_mc_CRY-T-3-all | D_mc_CRY-S-0-all | D_mc_CRY-S-1-all | D_mc_CRY-S-2-all | D_mc_CRY-S-3-all | sum-mc_CRY-chi-0-all | sum-mc_CRY-chi-1-all | sum-mc_CRY-chi-2-all | sum-mc_CRY-chi-3-all | sum-mc_CRY-Z-0-all | sum-mc_CRY-Z-1-all | sum-mc_CRY-Z-2-all | sum-mc_CRY-Z-3-all | sum-mc_CRY-I-0-all | sum-mc_CRY-I-1-all | sum-mc_CRY-I-2-all | sum-mc_CRY-I-3-all | sum-mc_CRY-T-0-all | sum-mc_CRY-T-1-all | sum-mc_CRY-T-2-all | sum-mc_CRY-T-3-all | sum-mc_CRY-S-0-all | sum-mc_CRY-S-1-all | sum-mc_CRY-S-2-all | sum-mc_CRY-S-3-all | sum-D_mc_CRY-chi-0-all | sum-D_mc_CRY-chi-1-all | sum-D_mc_CRY-chi-2-all | sum-D_mc_CRY-chi-3-all | sum-D_mc_CRY-Z-0-all | sum-D_mc_CRY-Z-1-all | sum-D_mc_CRY-Z-2-all | sum-D_mc_CRY-Z-3-all | sum-D_mc_CRY-I-0-all | sum-D_mc_CRY-I-1-all | sum-D_mc_CRY-I-2-all | sum-D_mc_CRY-I-3-all | sum-D_mc_CRY-T-0-all | sum-D_mc_CRY-T-1-all | sum-D_mc_CRY-T-2-all | sum-D_mc_CRY-T-3-all | sum-D_mc_CRY-S-0-all | sum-D_mc_CRY-S-1-all | sum-D_mc_CRY-S-2-all | sum-D_mc_CRY-S-3-all | lc-chi-0-all | lc-chi-1-all | lc-chi-2-all | lc-chi-3-all | lc-Z-0-all | lc-Z-1-all | lc-Z-2-all | lc-Z-3-all | lc-I-0-all | lc-I-1-all | lc-I-2-all | lc-I-3-all | lc-T-0-all | lc-T-1-all | lc-T-2-all | lc-T-3-all | lc-S-0-all | lc-S-1-all | lc-S-2-all | lc-S-3-all | lc-alpha-0-all | lc-alpha-1-all | lc-alpha-2-all | lc-alpha-3-all | D_lc-chi-0-all | D_lc-chi-1-all | D_lc-chi-2-all | D_lc-chi-3-all | D_lc-Z-0-all | D_lc-Z-1-all | D_lc-Z-2-all | D_lc-Z-3-all | D_lc-I-0-all | D_lc-I-1-all | D_lc-I-2-all | D_lc-I-3-all | D_lc-T-0-all | D_lc-T-1-all | D_lc-T-2-all | D_lc-T-3-all | D_lc-S-0-all | D_lc-S-1-all | D_lc-S-2-all | D_lc-S-3-all | D_lc-alpha-0-all | D_lc-alpha-1-all | D_lc-alpha-2-all | D_lc-alpha-3-all | func-chi-0-all | func-chi-1-all | func-chi-2-all | func-chi-3-all | func-Z-0-all | func-Z-1-all | func-Z-2-all | func-Z-3-all | func-I-0-all | func-I-1-all | func-I-2-all | func-I-3-all | func-T-0-all | func-T-1-all | func-T-2-all | func-T-3-all | func-S-0-all | func-S-1-all | func-S-2-all | func-S-3-all | func-alpha-0-all | func-alpha-1-all | func-alpha-2-all | func-alpha-3-all | D_func-chi-0-all | D_func-chi-1-all | D_func-chi-2-all | D_func-chi-3-all | D_func-Z-0-all | D_func-Z-1-all | D_func-Z-2-all | D_func-Z-3-all | D_func-I-0-all | D_func-I-1-all | D_func-I-2-all | D_func-I-3-all | D_func-T-0-all | D_func-T-1-all | D_func-T-2-all | D_func-T-3-all | D_func-S-0-all | D_func-S-1-all | D_func-S-2-all | D_func-S-3-all | D_func-alpha-0-all | D_func-alpha-1-all | D_func-alpha-2-all | D_func-alpha-3-all | f-lig-chi-0 | f-lig-chi-1 | f-lig-chi-2 | f-lig-chi-3 | f-lig-Z-0 | f-lig-Z-1 | f-lig-Z-2 | f-lig-Z-3 | f-lig-I-0 | f-lig-I-1 | f-lig-I-2 | f-lig-I-3 | f-lig-T-0 | f-lig-T-1 | f-lig-T-2 | f-lig-T-3 | f-lig-S-0 | f-lig-S-1 | f-lig-S-2 | f-lig-S-3 | sum-lc-chi-0-all | sum-lc-chi-1-all | sum-lc-chi-2-all | sum-lc-chi-3-all | sum-lc-Z-0-all | sum-lc-Z-1-all | sum-lc-Z-2-all | sum-lc-Z-3-all | sum-lc-I-0-all | sum-lc-I-1-all | sum-lc-I-2-all | sum-lc-I-3-all | sum-lc-T-0-all | sum-lc-T-1-all | sum-lc-T-2-all | sum-lc-T-3-all | sum-lc-S-0-all | sum-lc-S-1-all | sum-lc-S-2-all | sum-lc-S-3-all | sum-lc-alpha-0-all | sum-lc-alpha-1-all | sum-lc-alpha-2-all | sum-lc-alpha-3-all | sum-D_lc-chi-0-all | sum-D_lc-chi-1-all | sum-D_lc-chi-2-all | sum-D_lc-chi-3-all | sum-D_lc-Z-0-all | sum-D_lc-Z-1-all | sum-D_lc-Z-2-all | sum-D_lc-Z-3-all | sum-D_lc-I-0-all | sum-D_lc-I-1-all | sum-D_lc-I-2-all | sum-D_lc-I-3-all | sum-D_lc-T-0-all | sum-D_lc-T-1-all | sum-D_lc-T-2-all | sum-D_lc-T-3-all | sum-D_lc-S-0-all | sum-D_lc-S-1-all | sum-D_lc-S-2-all | sum-D_lc-S-3-all | sum-D_lc-alpha-0-all | sum-D_lc-alpha-1-all | sum-D_lc-alpha-2-all | sum-D_lc-alpha-3-all | sum-func-chi-0-all | sum-func-chi-1-all | sum-func-chi-2-all | sum-func-chi-3-all | sum-func-Z-0-all | sum-func-Z-1-all | sum-func-Z-2-all | sum-func-Z-3-all | sum-func-I-0-all | sum-func-I-1-all | sum-func-I-2-all | sum-func-I-3-all | sum-func-T-0-all | sum-func-T-1-all | sum-func-T-2-all | sum-func-T-3-all | sum-func-S-0-all | sum-func-S-1-all | sum-func-S-2-all | sum-func-S-3-all | sum-func-alpha-0-all | sum-func-alpha-1-all | sum-func-alpha-2-all | sum-func-alpha-3-all | sum-D_func-chi-0-all | sum-D_func-chi-1-all | sum-D_func-chi-2-all | sum-D_func-chi-3-all | sum-D_func-Z-0-all | sum-D_func-Z-1-all | sum-D_func-Z-2-all | sum-D_func-Z-3-all | sum-D_func-I-0-all | sum-D_func-I-1-all | sum-D_func-I-2-all | sum-D_func-I-3-all | sum-D_func-T-0-all | sum-D_func-T-1-all | sum-D_func-T-2-all | sum-D_func-T-3-all | sum-D_func-S-0-all | sum-D_func-S-1-all | sum-D_func-S-2-all | sum-D_func-S-3-all | sum-D_func-alpha-0-all | sum-D_func-alpha-1-all | sum-D_func-alpha-2-all | sum-D_func-alpha-3-all | sum-f-lig-chi-0 | sum-f-lig-chi-1 | sum-f-lig-chi-2 | sum-f-lig-chi-3 | sum-f-lig-Z-0 | sum-f-lig-Z-1 | sum-f-lig-Z-2 | sum-f-lig-Z-3 | sum-f-lig-I-0 | sum-f-lig-I-1 | sum-f-lig-I-2 | sum-f-lig-I-3 | sum-f-lig-T-0 | sum-f-lig-T-1 | sum-f-lig-T-2 | sum-f-lig-T-3 | sum-f-lig-S-0 | sum-f-lig-S-1 | sum-f-lig-S-2 | sum-f-lig-S-3 | MNC | MPC | pure_CO2_kH | pure_CO2_widomHOA | pure_methane_kH | pure_methane_widomHOA | pure_uptake_CO2_298.00_15000 | pure_uptake_CO2_298.00_1600000 | pure_uptake_methane_298.00_580000 | pure_uptake_methane_298.00_6500000 | logKH_CO2 | logKH_CH4 | CH4DC | CH4HPSTP | CH4LPSTP | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2329.01 | 1251.28 | 6.61256 | 8.87694 | 8.48668 | 0.0 | 0.818919 | 0.68874 | 0.0 | 0.0 | 0.841035 | str_m1_o10_o10_pcu_sym.102 | 2329.01 | 2769.218879 | 0.68874 | 0.818919 | 2.7225 | 22.704 | 20.79 | 63.7065 | 900.0 | 960.0 | 3240.0 | 2700.0 | 1.0 | 4.0 | 6.0 | 12.0 | 16.0 | 40.0 | 84.0 | 108.0 | 1.7161 | 3.8252 | 8.1744 | 11.6328 | 0.0 | -7.16 | -2.7 | -18.81 | 0.0 | 88.0 | 72.0 | 270.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 6.0 | 3.0 | 21.0 | 0.0 | 2.32 | 1.62 | 6.84 | 10.89 | 90.816 | 83.16 | 254.826 | 3600.0 | 3840.0 | 12960.0 | 10800.0 | 4.0 | 16.0 | 24.0 | 48.0 | 64.0 | 160.0 | 336.0 | 432.0 | 6.8644 | 15.3008 | 32.6976 | 46.5312 | 0.0 | -28.64 | -10.8 | -75.24 | 0.0 | 352.0 | 288.0 | 1080.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 24.0 | 12.0 | 84.0 | 0.0 | 9.28 | 6.48 | 27.36 | 11.8336 | 8.772 | 20.6056 | 16.540667 | 64.0 | 48.0 | 112.0 | 62.666667 | 1.0 | 1.0 | 2.0 | 2.0 | 1.0 | 3.0 | 4.0 | 4.500000 | 0.5329 | 0.5621 | 1.095 | 0.880867 | 28.09 | 59.89 | 87.98 | 89.746667 | 0.0 | 0.89 | 0.89 | 2.071667 | 0.0 | 2.0 | 2.0 | 8.166667 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -2.0 | -2.0 | -2.500000 | 0.0 | -0.04 | -0.04 | 0.253333 | 0.0 | -6.0 | -6.0 | -6.333333 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 88.418567 | 143.446000 | 210.490400 | 200.998667 | 414.666667 | 656.000000 | 886.000000 | 646.666667 | 11.000000 | 20.000000 | 28.000000 | 28.000000 | 48.000000 | 104.000000 | 108.000000 | 88.000000 | 5.065900 | 9.968933 | 12.533467 | 11.089733 | 35.5008 | 26.316 | 61.8168 | 49.6220 | 192.0 | 144.0 | 336.0 | 188.0 | 3.0 | 3.0 | 6.0 | 6.0 | 3.0 | 9.0 | 12.0 | 13.5 | 1.5987 | 1.6863 | 3.285 | 2.64260 | 84.27 | 179.67 | 263.94 | 269.240 | 0.0 | 2.67 | 2.67 | 6.215 | 0.0 | 6.0 | 6.0 | 24.5 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -6.0 | -6.0 | -7.5 | 0.0 | -0.12 | -0.12 | 0.760 | 0.0 | -18.0 | -18.0 | -19.00 | 0.0000 | 0.00000 | 0.0000 | 0.00000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.00 | 0.0 | 0.0000 | 0.0000 | 0.0000 | 0.0 | 0.00 | 0.00 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0000 | 0.0000 | 0.00 | 0.0 | 0.000 | 0.000 | 0.0 | 265.2557 | 430.3380 | 631.4712 | 602.9960 | 1244.0 | 1968.0 | 2658.0 | 1940.0 | 33.0 | 60.0 | 84.0 | 84.0 | 144.0 | 312.0 | 324.0 | 264.0 | 15.1977 | 29.9068 | 37.6004 | 33.2692 | -0.550363 | 0.948402 | 0.000007 | -12.903174 | 0.000003 | -8.144317 | 0.111981 | 14.218595 | 1.680640 | 9.163066 | -5.125451 | -5.511444 | 175.569974 | 215.005044 | 39.435070 |
| 1 | 1983.81 | 1254.01 | 5.80566 | 7.13426 | 7.13154 | 0.0 | 0.495493 | 0.58032 | 0.0 | 0.0 | 1.171200 | str_m1_o10_o10_pcu_sym.124 | 1983.81 | 1693.826844 | 0.58032 | 0.495493 | 2.7225 | 22.704 | 20.79 | 63.7065 | 900.0 | 960.0 | 3240.0 | 2700.0 | 1.0 | 4.0 | 6.0 | 12.0 | 16.0 | 40.0 | 84.0 | 108.0 | 1.7161 | 3.8252 | 8.1744 | 11.6328 | 0.0 | -7.16 | -2.7 | -18.81 | 0.0 | 88.0 | 72.0 | 270.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 6.0 | 3.0 | 21.0 | 0.0 | 2.32 | 1.62 | 6.84 | 10.89 | 90.816 | 83.16 | 254.826 | 3600.0 | 3840.0 | 12960.0 | 10800.0 | 4.0 | 16.0 | 24.0 | 48.0 | 64.0 | 160.0 | 336.0 | 432.0 | 6.8644 | 15.3008 | 32.6976 | 46.5312 | 0.0 | -28.64 | -10.8 | -75.24 | 0.0 | 352.0 | 288.0 | 1080.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 24.0 | 12.0 | 84.0 | 0.0 | 9.28 | 6.48 | 27.36 | 11.8336 | 8.772 | 20.6056 | 18.048533 | 64.0 | 48.0 | 112.0 | 205.333333 | 1.0 | 1.0 | 2.0 | 2.0 | 1.0 | 3.0 | 4.0 | 5.000000 | 0.5329 | 0.5621 | 1.095 | 1.210583 | 28.09 | 59.89 | 87.98 | 139.478333 | 0.0 | 0.89 | 0.89 | 1.633333 | 0.0 | 2.0 | 2.0 | -9.666667 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -2.0 | -2.0 | -3.000000 | 0.0 | -0.04 | -0.04 | -0.198333 | 0.0 | -6.0 | -6.0 | -15.716667 | 5.841067 | 5.032000 | 10.064000 | 24.045067 | 816.666667 | 140.000000 | 280.000000 | 991.666667 | 0.666667 | 0.666667 | 1.333333 | 2.666667 | 0.666667 | 2.000000 | 4.000000 | 5.000000 | 0.866400 | 0.585200 | 1.170400 | 2.420600 | 294.000000 | 158.200000 | 316.400000 | 532.700000 | 0.0 | 0.273333 | 0.546667 | -0.230000 | 0.0 | 19.333333 | 38.666667 | 65.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -1.333333 | -2.666667 | -2.333333 | 0.0 | 0.246667 | 0.493333 | 0.916667 | 0.0 | 6.466667 | 12.933333 | 30.633333 | 97.734333 | 159.137000 | 240.332400 | 244.392333 | 1651.333333 | 1096.000000 | 1756.000000 | 3195.333333 | 12.000000 | 22.000000 | 32.000000 | 32.000000 | 54.000000 | 118.000000 | 128.000000 | 112.000000 | 6.517500 | 11.929867 | 16.159333 | 17.527267 | 35.5008 | 26.316 | 61.8168 | 54.1456 | 192.0 | 144.0 | 336.0 | 616.0 | 3.0 | 3.0 | 6.0 | 6.0 | 3.0 | 9.0 | 12.0 | 15.0 | 1.5987 | 1.6863 | 3.285 | 3.63175 | 84.27 | 179.67 | 263.94 | 418.435 | 0.0 | 2.67 | 2.67 | 4.900 | 0.0 | 6.0 | 6.0 | -29.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -6.0 | -6.0 | -9.0 | 0.0 | -0.12 | -0.12 | -0.595 | 0.0 | -18.0 | -18.0 | -47.15 | 17.5232 | 15.09600 | 30.1920 | 72.13520 | 2450.0 | 420.0 | 840.0 | 2975.0 | 2.0 | 2.0 | 4.0 | 8.0 | 2.0 | 6.0 | 12.0 | 15.0 | 2.599200 | 1.755600 | 3.511200 | 7.2618 | 882.0000 | 474.6000 | 949.2000 | 1598.10 | 0.0 | 0.8200 | 1.6400 | -0.6900 | 0.0 | 58.00 | 116.00 | 195.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -4.0 | -8.0 | -7.0 | 0.0 | 0.7400 | 1.4800 | 2.75 | 0.0 | 19.400 | 38.800 | 91.9 | 293.2030 | 477.4110 | 720.9972 | 733.1770 | 4954.0 | 3288.0 | 5268.0 | 9586.0 | 36.0 | 66.0 | 96.0 | 96.0 | 162.0 | 354.0 | 384.0 | 336.0 | 19.5525 | 35.7896 | 48.4780 | 52.5818 | -0.542509 | 1.022895 | 0.000031 | -23.395284 | 0.000003 | -10.208005 | 0.481625 | 9.312424 | 1.513152 | 5.908356 | -4.502967 | -5.505947 | 143.616349 | 193.059644 | 49.443295 |
| 2 | 2259.13 | 1250.58 | 5.99131 | 8.01682 | 7.98933 | 0.0 | 0.728036 | 0.65710 | 0.0 | 0.0 | 0.902566 | str_m1_o10_o10_pcu_sym.183 | 2259.13 | 2503.008090 | 0.65710 | 0.728036 | 2.7225 | 22.704 | 20.79 | 63.7065 | 900.0 | 960.0 | 3240.0 | 2700.0 | 1.0 | 4.0 | 6.0 | 12.0 | 16.0 | 40.0 | 84.0 | 108.0 | 1.7161 | 3.8252 | 8.1744 | 11.6328 | 0.0 | -7.16 | -2.7 | -18.81 | 0.0 | 88.0 | 72.0 | 270.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 6.0 | 3.0 | 21.0 | 0.0 | 2.32 | 1.62 | 6.84 | 10.89 | 90.816 | 83.16 | 254.826 | 3600.0 | 3840.0 | 12960.0 | 10800.0 | 4.0 | 16.0 | 24.0 | 48.0 | 64.0 | 160.0 | 336.0 | 432.0 | 6.8644 | 15.3008 | 32.6976 | 46.5312 | 0.0 | -28.64 | -10.8 | -75.24 | 0.0 | 352.0 | 288.0 | 1080.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 24.0 | 12.0 | 84.0 | 0.0 | 9.28 | 6.48 | 27.36 | 11.8336 | 8.772 | 20.6056 | 17.962533 | 64.0 | 48.0 | 112.0 | 81.333333 | 1.0 | 1.0 | 2.0 | 2.0 | 1.0 | 3.0 | 4.0 | 4.666667 | 0.5329 | 0.5621 | 1.095 | 0.968467 | 28.09 | 59.89 | 87.98 | 91.160000 | 0.0 | 0.89 | 0.89 | 1.658333 | 0.0 | 2.0 | 2.0 | 5.833333 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -2.0 | -2.0 | -2.666667 | 0.0 | -0.04 | -0.04 | 0.133333 | 0.0 | -6.0 | -6.0 | -6.600000 | 7.889067 | 9.632000 | 11.495333 | 23.128267 | 42.666667 | 36.000000 | 57.333333 | 114.666667 | 0.666667 | 1.166667 | 1.333333 | 2.333333 | 2.166667 | 4.500000 | 6.666667 | 7.666667 | 0.355267 | 0.509783 | 0.700800 | 1.185033 | 18.726667 | 51.851667 | 73.846667 | 95.930000 | 0.0 | 1.213333 | 1.245000 | 1.303333 | 0.0 | 4.833333 | 3.500000 | 4.333333 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -0.333333 | -1.333333 | 0.000000 | 0.0 | 0.153333 | 0.013333 | 0.080000 | 0.0 | -3.600000 | -6.866667 | -5.733333 | 97.025500 | 156.120667 | 220.135733 | 232.212267 | 478.000000 | 746.666667 | 1033.333333 | 948.666667 | 11.333333 | 20.666667 | 27.333333 | 29.333333 | 48.000000 | 104.666667 | 112.666667 | 97.333333 | 5.507533 | 10.693533 | 13.553667 | 13.183200 | 35.5008 | 26.316 | 61.8168 | 53.8876 | 192.0 | 144.0 | 336.0 | 244.0 | 3.0 | 3.0 | 6.0 | 6.0 | 3.0 | 9.0 | 12.0 | 14.0 | 1.5987 | 1.6863 | 3.285 | 2.90540 | 84.27 | 179.67 | 263.94 | 273.480 | 0.0 | 2.67 | 2.67 | 4.975 | 0.0 | 6.0 | 6.0 | 17.5 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -6.0 | -6.0 | -8.0 | 0.0 | -0.12 | -0.12 | 0.400 | 0.0 | -18.0 | -18.0 | -19.80 | 23.6672 | 28.89600 | 34.4860 | 69.38480 | 128.0 | 108.0 | 172.0 | 344.0 | 2.0 | 3.5 | 4.0 | 7.0 | 6.5 | 13.5 | 20.0 | 23.0 | 1.065800 | 1.529350 | 2.102400 | 3.5551 | 56.1800 | 155.5550 | 221.5400 | 287.79 | 0.0 | 3.6400 | 3.7350 | 3.9100 | 0.0 | 14.50 | 10.50 | 13.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -1.0 | -4.0 | 0.0 | 0.0 | 0.4600 | 0.0400 | 0.24 | 0.0 | -10.800 | -20.600 | -17.2 | 291.0765 | 468.3620 | 660.4072 | 696.6368 | 1434.0 | 2240.0 | 3100.0 | 2846.0 | 34.0 | 62.0 | 82.0 | 88.0 | 144.0 | 314.0 | 338.0 | 292.0 | 16.5226 | 32.0806 | 40.6610 | 39.5496 | -0.758790 | 1.029985 | 0.000037 | -27.155329 | 0.000003 | -8.479801 | 0.401683 | 14.796071 | 1.569714 | 7.933198 | -4.433968 | -5.525707 | 160.238808 | 199.765744 | 39.526937 |
| 3 | 1424.54 | 1249.27 | 4.73477 | 7.05822 | 7.05822 | 0.0 | 0.453157 | 0.47338 | 0.0 | 0.0 | 1.044630 | str_m1_o10_o10_pcu_sym.184 | 1424.54 | 1363.679006 | 0.47338 | 0.453157 | 2.7225 | 22.704 | 20.79 | 63.7065 | 900.0 | 960.0 | 3240.0 | 2700.0 | 1.0 | 4.0 | 6.0 | 12.0 | 16.0 | 40.0 | 84.0 | 108.0 | 1.7161 | 3.8252 | 8.1744 | 11.6328 | 0.0 | -7.16 | -2.7 | -18.81 | 0.0 | 88.0 | 72.0 | 270.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 6.0 | 3.0 | 21.0 | 0.0 | 2.32 | 1.62 | 6.84 | 10.89 | 90.816 | 83.16 | 254.826 | 3600.0 | 3840.0 | 12960.0 | 10800.0 | 4.0 | 16.0 | 24.0 | 48.0 | 64.0 | 160.0 | 336.0 | 432.0 | 6.8644 | 15.3008 | 32.6976 | 46.5312 | 0.0 | -28.64 | -10.8 | -75.24 | 0.0 | 352.0 | 288.0 | 1080.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 24.0 | 12.0 | 84.0 | 0.0 | 9.28 | 6.48 | 27.36 | 11.8336 | 8.772 | 20.6056 | 18.673467 | 64.0 | 48.0 | 112.0 | 90.666667 | 1.0 | 1.0 | 2.0 | 2.0 | 1.0 | 3.0 | 4.0 | 4.833333 | 0.5329 | 0.5621 | 1.095 | 1.012267 | 28.09 | 59.89 | 87.98 | 91.866667 | 0.0 | 0.89 | 0.89 | 1.451667 | 0.0 | 2.0 | 2.0 | 4.666667 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -2.0 | -2.0 | -2.833333 | 0.0 | -0.04 | -0.04 | 0.073333 | 0.0 | -6.0 | -6.0 | -6.733333 | 11.833600 | 17.544000 | 40.248000 | 40.408533 | 64.000000 | 96.000000 | 120.000000 | 197.333333 | 1.000000 | 2.000000 | 5.000000 | 4.000000 | 4.000000 | 14.000000 | 18.000000 | 13.333333 | 0.532900 | 1.124200 | 1.934500 | 1.995333 | 28.090000 | 119.780000 | 191.330000 | 151.933333 | 0.0 | 1.780000 | 5.500000 | 2.013333 | 0.0 | 4.000000 | 25.000000 | 7.333333 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -3.000000 | 1.000000 | 1.333333 | 0.0 | -0.080000 | 1.000000 | 0.186667 | 0.0 | -12.000000 | -9.600000 | -7.466667 | 130.498833 | 234.464000 | 361.771400 | 364.162067 | 577.333333 | 1024.000000 | 1398.000000 | 1414.666667 | 17.333333 | 33.333333 | 50.000000 | 50.666667 | 83.333333 | 186.000000 | 218.000000 | 232.000000 | 7.544933 | 16.108400 | 21.487933 | 22.200800 | 35.5008 | 26.316 | 61.8168 | 56.0204 | 192.0 | 144.0 | 336.0 | 272.0 | 3.0 | 3.0 | 6.0 | 6.0 | 3.0 | 9.0 | 12.0 | 14.5 | 1.5987 | 1.6863 | 3.285 | 3.03680 | 84.27 | 179.67 | 263.94 | 275.600 | 0.0 | 2.67 | 2.67 | 4.355 | 0.0 | 6.0 | 6.0 | 14.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -6.0 | -6.0 | -8.5 | 0.0 | -0.12 | -0.12 | 0.220 | 0.0 | -18.0 | -18.0 | -20.20 | 35.5008 | 52.63200 | 120.7440 | 121.22560 | 192.0 | 288.0 | 360.0 | 592.0 | 3.0 | 6.0 | 15.0 | 12.0 | 12.0 | 42.0 | 54.0 | 40.0 | 1.598700 | 3.372600 | 5.803500 | 5.9860 | 84.2700 | 359.3400 | 573.9900 | 455.80 | 0.0 | 5.3400 | 16.5000 | 6.0400 | 0.0 | 12.00 | 75.00 | 22.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -9.0 | 3.0 | 4.0 | 0.0 | -0.2400 | 3.0000 | 0.56 | 0.0 | -36.000 | -28.800 | -22.4 | 391.4965 | 703.3920 | 1085.3142 | 1092.4862 | 1732.0 | 3072.0 | 4194.0 | 4244.0 | 52.0 | 100.0 | 150.0 | 152.0 | 250.0 | 558.0 | 654.0 | 696.0 | 22.6348 | 48.3252 | 64.4638 | 66.6024 | -0.668312 | 0.994048 | 0.000073 | -29.916546 | 0.000005 | -12.615382 | 0.821747 | 10.816880 | 2.161833 | 6.710778 | -4.135434 | -5.297082 | 132.576623 | 195.582107 | 63.005483 |
| 4 | 2228.31 | 1250.61 | 6.40783 | 8.35944 | 8.26946 | 0.0 | 0.700539 | 0.65092 | 0.0 | 0.0 | 0.929170 | str_m1_o10_o10_pcu_sym.189 | 2228.31 | 2398.172563 | 0.65092 | 0.700539 | 2.7225 | 22.704 | 20.79 | 63.7065 | 900.0 | 960.0 | 3240.0 | 2700.0 | 1.0 | 4.0 | 6.0 | 12.0 | 16.0 | 40.0 | 84.0 | 108.0 | 1.7161 | 3.8252 | 8.1744 | 11.6328 | 0.0 | -7.16 | -2.7 | -18.81 | 0.0 | 88.0 | 72.0 | 270.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 6.0 | 3.0 | 21.0 | 0.0 | 2.32 | 1.62 | 6.84 | 10.89 | 90.816 | 83.16 | 254.826 | 3600.0 | 3840.0 | 12960.0 | 10800.0 | 4.0 | 16.0 | 24.0 | 48.0 | 64.0 | 160.0 | 336.0 | 432.0 | 6.8644 | 15.3008 | 32.6976 | 46.5312 | 0.0 | -28.64 | -10.8 | -75.24 | 0.0 | 352.0 | 288.0 | 1080.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 24.0 | 12.0 | 84.0 | 0.0 | 9.28 | 6.48 | 27.36 | 11.8336 | 8.772 | 20.6056 | 16.557867 | 64.0 | 48.0 | 112.0 | 76.000000 | 1.0 | 1.0 | 2.0 | 2.0 | 1.0 | 3.0 | 4.0 | 4.500000 | 0.5329 | 0.5621 | 1.095 | 0.911283 | 28.09 | 59.89 | 87.98 | 96.901667 | 0.0 | 0.89 | 0.89 | 2.066667 | 0.0 | 2.0 | 2.0 | 6.500000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -2.0 | -2.0 | -2.500000 | 0.0 | -0.04 | -0.04 | 0.211667 | 0.0 | -6.0 | -6.0 | -7.683333 | 3.513100 | 5.616517 | 9.679300 | 8.147783 | 37.333333 | 72.666667 | 61.333333 | 56.000000 | 0.333333 | 0.666667 | 1.000000 | 1.000000 | 1.833333 | 3.833333 | 4.166667 | 4.166667 | 0.219925 | 0.460258 | 0.569325 | 0.547067 | 38.385833 | 71.665833 | 72.829167 | 76.600000 | 0.0 | 0.105833 | 0.259167 | 0.542500 | 0.0 | 1.416667 | 3.416667 | 5.583333 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.166667 | 0.000000 | 0.000000 | 0.0 | 0.050833 | 0.085833 | 0.163333 | 0.0 | 0.741667 | 1.091667 | 1.400000 | 97.076800 | 155.072733 | 234.687600 | 227.709933 | 551.333333 | 945.333333 | 1186.666667 | 982.666667 | 11.333333 | 20.666667 | 28.666667 | 30.000000 | 49.333333 | 108.000000 | 114.000000 | 95.333333 | 5.656700 | 11.196733 | 14.388133 | 13.181933 | 35.5008 | 26.316 | 61.8168 | 49.6736 | 192.0 | 144.0 | 336.0 | 228.0 | 3.0 | 3.0 | 6.0 | 6.0 | 3.0 | 9.0 | 12.0 | 13.5 | 1.5987 | 1.6863 | 3.285 | 2.73385 | 84.27 | 179.67 | 263.94 | 290.705 | 0.0 | 2.67 | 2.67 | 6.200 | 0.0 | 6.0 | 6.0 | 19.5 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -6.0 | -6.0 | -7.5 | 0.0 | -0.12 | -0.12 | 0.635 | 0.0 | -18.0 | -18.0 | -23.05 | 10.5393 | 16.84955 | 29.0379 | 24.44335 | 112.0 | 218.0 | 184.0 | 168.0 | 1.0 | 2.0 | 3.0 | 3.0 | 5.5 | 11.5 | 12.5 | 12.5 | 0.659775 | 1.380775 | 1.707975 | 1.6412 | 115.1575 | 214.9975 | 218.4875 | 229.80 | 0.0 | 0.3175 | 0.7775 | 1.6275 | 0.0 | 4.25 | 10.25 | 16.75 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.5 | 0.0 | 0.0 | 0.0 | 0.1525 | 0.2575 | 0.49 | 0.0 | 2.225 | 3.275 | 4.2 | 291.2304 | 465.2182 | 704.0628 | 683.1298 | 1654.0 | 2836.0 | 3560.0 | 2948.0 | 34.0 | 62.0 | 86.0 | 90.0 | 148.0 | 324.0 | 342.0 | 286.0 | 16.9701 | 33.5902 | 43.1644 | 39.5458 | -0.648170 | 1.064184 | 0.000017 | -17.669823 | 0.000003 | -8.743404 | 0.258905 | 14.153999 | 1.653013 | 8.272621 | -4.774301 | -5.515219 | 171.601539 | 214.452966 | 42.851427 |
Click here for a hint
- Use something like
pd.options.display.max_columns=100to adjust how many columns are shown.pd.options.display.max_columns=100would show at maximum 100 columns.
Let's also get some basic information ...
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 17379 entries, 0 to 17378 Columns: 343 entries, ASA [m^2/cm^3] to CH4LPSTP dtypes: float64(342), object(1) memory usage: 45.5+ MB
$\color{DarkBlue}{\textsf{Short question}}$
- How many materials are in the dataset?
Answer: The dataset is made up of 17 379 entries, or materials.
- Which datatypes do we deal with?
Answer: We deal with 342 float 64 and 1 object corresponding to the column categories.
Below, we define three global variables (hence upper case), which are the names of our feature and target columns. We will use the TARGET for the actual regression and the TARGET_BINARY only for the stratified train/test split. The FEATURES variable is a list of column names of our dataframe.
TARGET = "pure_uptake_CO2_298.00_1600000"
TARGET_BINARY = "target_binned" # will be created later
FEATURES = (
geometric_descriptors
+ summed_functionalgroup_descriptors
+ summed_linker_descriptors
+ summed_metalcenter_descriptors
)
As descriptors we will use geometric properties such as density, pore volume, etc. and revised autocorrelation functions (RACs) that have been optimized for describing inorganic compounds (and recently adapated for MOFs)
Examples for pore geometry descriptors (in geometric_descriptors) include: $D_i$ (the size of the largest included sphere), $D_f$ (the largest free sphere), and $D_{if}$ (the largest included free sphere) along the pore $-$ three ways of characterizing pore size.

Also included are the surface area (SA) of the pore, and the probe-occupiable pore volume (POV). More details on the description of pore geometries can be found in Ongari et al.
RACs (in the lists starting with summed_...) operate on the structure graph and encode information about the metal center, linkers and the functional groups as differences or products of heuristics that are relevant for inorganic chemistry, such as electronegativity ($\chi$), connectivity ($T$), identity ($I$), covalent radii ($S$), and nuclear charge ($Z$).

The number in the descriptornames shows the coordination shell that was considered in the calculation of the RACs.
The target we use for this application is the high-pressure CO$_2$ uptake. This is the amount of CO$_2$ (mmol) the MOF can load per gram.
2. Split the data¶
Next, we split our data into a training set and a test set.
In order to prevent any information of the test set from leaking into our model, we split before starting to analyze or transform our data. For more details on why this matters, see chapter 7.10.2 of Elements of Statistical Learning.
2.1. Split with stratification¶
Stratification ensures that the class distributions (ratio of "good" to "bad" materials) are the same in the training and test set.
$\color{DarkBlue}{\textsf{Short question}}$
- Why is this important? What could happen if we would not do this?
Answer: Stratification helps training the model with the same ratio of classes that will be found in the testing set. If strastification is not performed, it is to say, if one of the two sets, for instance the test set, has more "good" materials; the model may appear as performing well but would fail over "bad" materials. The same could happen the over way round yielding a poor model performance because it was only trained on "bad" materials.
For stratification to work, we to define what makes a "good" or a "bad" material. We will use 15 mmol CO$_2$ / g as the threshold for the uptake, thus binarizing our continuous target variable. (You can choose it based on the histogram of the variables).
$\color{DarkBlue}{\textsf{Short Exercise}}$
- add a column 'target_binary' that encodes whether a material is low performing (
0) or high perfoming (1) by comparing the uptake with theTHRESHOLD
Click here for a hint
- you can use pd.cut, list comprehension, the binarizer in sklearn ...)
- a list comprehension example:
[1 if value > THRESHOLD else 0 for value in df[TARGET]]
THRESHOLD = 15 # in units of mmol CO2/g
df[TARGET_BINARY] = [1 if value > THRESHOLD else 0 for value in df[TARGET]]
Now, we can perform the actual split into training and test set.
$\color{DarkBlue}{\textsf{Short Exercise}}$¶
- select reasonable values for
XXandXYand then perform the test/train splits. What do you consider when making this decision (think about what you would do with really small and really big datasets, what happens if you have only one test point, what happens to the model performance if you have more test points than training points)?
Answer: In order to choose the split ratio for for the train and test sets, one has to consider the number of data points. For a big data set, one can increase the size of the training set and reduce the size of the test set. This will ensure that the model is more accurate as it is trained on a lot of data and there would still be enough data to evaluate its performance. However, for a small data set, it’s crucial to maintain a balance. One should have enough data to train the model while keeping a sufficient amount of data to evaluate its performance.
- why do we need to perform the split into a training and test set?
Answer: Because a training set is needed to train the model, helping it learn patterns and relationships within the data. Whereas a test set is needed to see how well the model will perform on real-world data, i.e. if the model is performant.
- would we use the test set to tune the hyperparameters of our model?
Answer: No, one should not use the test set to tune the hyperparameters of the model. The model would be indirectly optimized to perform well on the test set rather than on truly unseen data. The test data set should only be used at the final evaluation of the model, so that an unbiased assessement of its performance is given.
Click here for a hint
- The `size` arguments can either be integers or, often more convenient, decimals like 0.1
- When you perform the split into training and test set you need to trade-off bias (pessimistic bias due to little training data) and variance (due to little test data)
- A typical split cloud be 70/30, but for huge dataset the test set might be too big and for small datasets the training set might be too small in this way
df_train_stratified, df_test_stratified = train_test_split(
df,
train_size=0.7,
test_size=0.3,
random_state=RANDOM_SEED,
stratify=df[TARGET_BINARY],
)
3. Exploratory data analysis (EDA)¶
After we have put the test set aside, we can give the training set a closer look.
3.1. Correlations¶
$\color{DarkBlue}{\textsf{Short Exercise}}$
- Plot some features against the target property and calculate the Pearson and Spearman correlation coefficient (what is the different between those correlation coefficients?)
$\color{Green}{\textsf{Answer}}$: The Pearson and Spearman correlation coefficients both measure the strength and direction of a relationship between two variables. The Pearson correlation coefficient is designed to measure linear relationships and relies on the covariance of continuous, normally distributed variables. It’s sensitive to outliers, as it depends on actual data values. It produces a value of +1 for a perfect positive linear relationship, -1 for a perfect negative linear relationship, and 0 for no linear association. In contrast, the Spearman correlation coefficient measures the strength of monotonic relationships by using ranked values, making it a non-parametric measure suitable for ordinal data or data with outliers. A Spearman coefficient of +1 or -1 indicates a perfect monotonic relationship, while 0 signifies no monotonic association, making it more robust for datasets that do not meet normality assumptions. Therefore, the major difference between the two coefficients is the type of relationship they measure.
- What are the strongest correlations? Did you expect them?
$\color{Green}{\textsf{Answer}}$: The Spearman correlation coefficients are greater in all three graphs compared to the Pearson correlation coefficient. When the Spearman correlation coefficient is greater than the Pearson correlation coefficient, it suggests a consistent but non-linear relationship between the variables. Listed below are the Spearman correlation coefficients of each graph.
- Spearman Correlation:
- density [g/cm3]: -0.948727
- ASA [m2/cm3]: 0.859978
- POAV [cm3/g]: 0.923432
One can see that the density and the pure_uptake_CO2_298.00_1600000 have the strongest negative correlation coefficient. Specifically, this suggests that as the density of the MOF increases, the amount of carbon dioxide (CO₂) it can adsorb (measured as pure uptake at 298 K and a pressure of 1,600,000 Pa) tends to decrease.
This inverse correlation may indicate that denser MOFs have less available pore space or fewer active sites for CO₂ adsorption, resulting in lower uptake capacity. In other words, the structural characteristics that contribute to higher density might limit the MOF's effectiveness in capturing CO₂, which is an important consideration in designing materials for carbon capture applications. Understanding this relationship can help guide the development of MOFs that optimize both density and CO₂ uptake for better performance in environmental and industrial applications.
Similarly, we strongly correlate the PAOV and the pure_uptake_CO2_298.00_1600000. However, we have a positive correlation coefficient, indicating that both increase together.
In the context of Metal-Organic Frameworks (MOFs), a higher PAOV typically means that there is more available space within the structure for gas molecules to enter and be retained. This increased availability can enhance the adsorption capacity of CO₂. Therefore, a strong correlation between PAOV and CO₂ uptake indicates that the structural features of the MOF, which allow for a larger volume of accessible space, are critical for optimizing its performance in capturing carbon dioxide.
And finally, we see a moderate positive correlation between the ASA and the pure_uptake_CO2_298.00_1600000. This relationship indicates that a larger ASA provides more active sites for the interaction and adsorption of CO₂ molecules. A greater surface area typically allows for more contact between the gas and the solid, enhancing the material's overall capacity to capture carbon dioxide. However, since the correlation is described as moderate, it implies that while there is a tendency for increased CO₂ uptake with larger surface areas, other factors may also play a significant role in influencing the adsorption process.
- What can be a problem when features are correlated?
$\color{Green}{\textsf{Answer}}$: Correlated features in a dataset can lead to several issues including multicollinearity - which makes it hard to determine how much each feature affects the outcome because they are too similar. This can also result in overfitting, where the model learns from noise in the data instead of the real patterns, making it less accurate on new data. Additionally, having correlated features can create redundancy, meaning that some features provide the same information, which can complicate the model unnecessarily. It can also make it harder to understand the model, as it becomes difficult to know which feature is really influencing the outcome. Lastly, it can slow down the training process for the model because the presence of correlated features can make it harder for optimization algorithms to work effectively.
- Optional: Do they change if you switch from CO$_2$ to CH$_4$ uptake as the target instead? Explain your observation.
$\color{Green}{\textsf{Answer}}$:
TO DO : SWITCH TO CH4
To get the correlation matrices, you can use the df.corr(method=)method on your dataframe (df). You might want to calculate not the full correlation matrix but just the correlation of the features with the targets
Click here for a hint
- To get the correlation with a target, you can use indexing. E.g.
df.corr(method='spearman')[TARGET] - use
.sort_values()method on the output of `df.corr()` to sort by the value of the correlation coefficient - You can use something like
scatter = hv.Scatter(df, 'Di', [TARGET, 'density [g/cm^3]']).opts(color='density [g/cm^3]', cmap='rainbow')for plotting. Also consider theholoviewsdocumentation. In caseholoviewsis too new for you, you can of course just usematplotliband something likeplt.scatter(x,y)
import pandas as pd
from scipy.stats import pearsonr, spearmanr
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource
from bokeh.layouts import row
# Use this if you are running in a Jupyter Notebook
output_notebook()
# Assuming df is your DataFrame
features = ['ASA [m^2/cm^3]', 'density [g/cm^3]', 'POAV [cm^3/g]']
target = 'pure_uptake_CO2_298.00_1600000'
# Create a list to hold all plots
plots = []
# Calculate and display correlation coefficients
for feature in features:
pearson_corr, _ = pearsonr(df[feature], df[target])
spearman_corr, _ = spearmanr(df[feature], df[target])
print(f'Feature: {feature}')
print(f' Pearson correlation: {pearson_corr}')
print(f' Spearman correlation: {spearman_corr}')
# Create a Bokeh scatter plot
source = ColumnDataSource(data=dict(x=df[feature], y=df[target]))
p = figure(title=f'{feature} vs {target}',
x_axis_label=feature,
y_axis_label=target,
width=400, # Adjust width as needed
height=400) # Adjust height as needed
p.scatter('x', 'y', source=source, alpha=0.6)
p.grid.grid_line_alpha = 0.3
# Add the plot to the list
plots.append(p)
# Show all plots in a row layout
show(row(*plots))
Feature: ASA [m^2/cm^3] Pearson correlation: 0.8275290072484319 Spearman correlation: 0.8599783195386285 Feature: density [g/cm^3] Pearson correlation: -0.8846360549246102 Spearman correlation: -0.9487270052266709 Feature: POAV [cm^3/g] Pearson correlation: 0.7867490758533948 Spearman correlation: 0.9234319927981253
import pandas as pd
from scipy.stats import pearsonr, spearmanr
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource
from bokeh.layouts import row
# Use this if you are running in a Jupyter Notebook
output_notebook()
# Assuming df is your DataFrame
features = ['ASA [m^2/cm^3]', 'density [g/cm^3]', 'POAV [cm^3/g]']
#target = 'pure_uptake_methane_298.00_580000'
target= 'pure_uptake_methane_298.00_6500000'
# Create a list to hold all plots
plots = []
# Calculate and display correlation coefficients
for feature in features:
pearson_corr, _ = pearsonr(df[feature], df[target])
spearman_corr, _ = spearmanr(df[feature], df[target])
print(f'Feature: {feature}')
print(f' Pearson correlation: {pearson_corr}')
print(f' Spearman correlation: {spearman_corr}')
# Create a Bokeh scatter plot
source = ColumnDataSource(data=dict(x=df[feature], y=df[target]))
p = figure(title=f'{feature} vs {target}',
x_axis_label=feature,
y_axis_label=target,
width=400, # Adjust width as needed
height=400) # Adjust height as needed
p.scatter('x', 'y', source=source, alpha=0.6)
p.grid.grid_line_alpha = 0.3
# Add the plot to the list
plots.append(p)
# Show all plots in a row layout
show(row(*plots))
Feature: ASA [m^2/cm^3] Pearson correlation: 0.7500077219792253 Spearman correlation: 0.8152596047201417 Feature: density [g/cm^3] Pearson correlation: -0.9098151180848304 Spearman correlation: -0.9749192006449127 Feature: POAV [cm^3/g] Pearson correlation: 0.9261379542223647 Spearman correlation: 0.9391612000914062
4. Baselines¶
For machine learning, it is important to have some baselines to which one then compares the results of a model. Think of a classification model for some rare disease where we only have 1% postives. A classification model that only predictes the negatives all the time will still have a amazingly high accuracy. To be able to understand if our model is really better than such a simple prediction we need to make the simple prediction first. This is what we call a baseline.
A baseline could be a really simple model, a basic heuristic or the current state of the art. this. We will use a heuristic.
For this we use sklearn Dummy objects that simply calculate the mean, the median or the most frequent case of the training set, when you run the fit() method on them (which takes the features matrix $\mathbf{X}$ and the labels $\mathbf{y}$ as arguments.
This is, the prediction of a DummyRegressor with mean strategy will always be the mean, independent of the input (it will not look at the feature matrix!).
Instead of using those sklearn objects you could also just manually compute the the mean or median of the dataset. But we will use those objects as we can learn in this way how to use estimators in sklearn and it is also allows you to test your full pipeline with different (baseline) models.
What does this mean? In practice this means that you can use all the regression and classification models shown in the figure below in the same way, they will all have a fit() method that accepts X and y and a predict method that accepts X and returns the predictions.

The estimator objects can be always used in the same way

Using these objects, instead of the mean directly, allows you to easily swap them with other models in pipelines, where one chains many data transformation steps (see section 6).
4.1. Build dummy models¶
$\color{DarkBlue}{\textsf{Short Question}}$
- If you call
.fit(X, y)on aDummyRegressordoes it actually use theX? If not, why is there still the place for theXin the function? If yes, how does it use it?
Answer: The DummyRegressor does not use the X to learn patterns. Xis included to check if the dimensionality of the two matrices match.
$\color{DarkBlue}{\textsf{Short Exercise}}$
- Create
DummyRegressorinstances formean,median. (e.g.dummyinstance = DummyRegressor(strategy='mean')) - Train them on the training data (
dummyinstance.fit(df_train[FEATURES], df_train[TARGET]))
Click here for hints
- to create
DummyRegressoryou can for example usedummyregressor_mean = DummyRegressor(strategy='mean') - to see the implementation of the
DummyRegressoryou can check out the source code on GitHub
# Build DummyRegressors
dummyregressor_mean = DummyRegressor(strategy='mean')
dummyregressor_median = DummyRegressor(strategy='median')
# Fit Dummy Regressors
dummyregressor_mean.fit(df_train_stratified[FEATURES], df_train_stratified[TARGET])
dummyregressor_median.fit(df_train_stratified[FEATURES], df_train_stratified[TARGET])
DummyRegressor(strategy='median')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DummyRegressor(strategy='median')
Evaluate the performance of the dummy models¶
$\color{DarkBlue}{\textsf{Short Exercise}}$
- Calculate maximum error, mean absolute error and mean square error for the dummy regressors on training and test set. What would you expect those numbers to be?
Answer: Large values are expected as only simple mathematical relations are used to fit the data.
- Do the actual values surprise you?
Answer:
The found values for the maximum error, mean absolute error and mean square error for the mean dummy regressors are:
- For the training set: 'mae': 6.332682697746154, 'mse': 54.36178484567787, 'max_error': 22.60380294886913
- For the testing set: 'mae': 6.27366716404819, 'mse': 53.03274968687297, 'max_error': 22.680388292469132
The found values for the maximum error, mean absolute error and mean square error for the median dummy regressors are:
- For the training set: 'mae': 6.260356674045895, 'mse': 56.256641800406506, 'max_error': 23.980340981299996
- For the testing set: 'mae': 6.183402451368086, 'mse': 54.69101599465944, 'max_error': 24.056926324899997
As the values shown above are large, it does not surprise us.
- What does this mean in practice for reporting of metrics/the reasoning behind using baseline models
Answer: The high errors from the DummyRegressor are expected, as it only predicts simple values like the mean or median without using any features. These baseline errors help us see if a more complex model actually learns useful patterns. If a model doesn’t perform better than the dummy, it likely isn’t effective.
It can be handy to store our metrics of choice in a nested dictionary (Python dictionaries are key-value pairs):
{
'dummyestimator1': {
'metric_a_key': metric_a_value,
'metric_b_key': metric_b_value
},
'dummyestimator2': {
'metric_a_key': metric_a_value,
'metric_b_key': metric_b_value
},
}
You will now write functions get_regression_metrics(model, X, y_true) that compute the metrics and return this dictionary for a given model. The predict method takes the feature matrix $\mathbf{X}$ as input.
In them, we calculate
$\mathrm {MAE} =\frac{\sum _{i=1}^{n}\left|Y_{i}-\hat{y}_{i}\right|}{n}.$
and
$\mathrm {MSE} = {\frac {1}{n}}\sum _{i=1}^{n}(Y_{i}-{\hat {Y_{i}}})^{2}.$
where $\hat{y}$ are the predictions and, $Y_{i}$ the true values.
as well as the maximum error.
Click here for hints
- to perform a prediction using a estimator object, you can call
classifier.predict(X) - to calculate metrics, you can for example call
accuracy_score(true_values, predicted_values)
def get_regression_metrics(model, X, y_true):
"""
Get a dicionary with regression metrics:
model: sklearn model with predict method
X: feature matrix
y_true: ground truth labels
"""
y_predicted = model.predict(X)
mae = mean_absolute_error(y_true, y_predicted)
mse = mean_squared_error(y_true, y_predicted)
maximum_error = max_error(y_true, y_predicted)
metrics_dict = {
'mae': mae,
'mse': mse,
'max_error': maximum_error
}
return metrics_dict
dummy_regressors = [
('mean', dummyregressor_mean),
('median', dummyregressor_median)
]
dummy_regressor_results_test = {} # initialize empty dictionary
dummy_regressor_results_train = {}
# loop over the dummy_regressor list
# if you have a tuple regressorname, regressor = (a, b) that is automatically expanded into the variables
# a = regressorname, b = regressor
for regressorname, regressor in dummy_regressors:
print(f"Calculating metrics for {regressorname}")
dummy_regressor_results_test[regressorname] = get_regression_metrics(regressor, df_test_stratified[FEATURES], df_test_stratified[TARGET])
dummy_regressor_results_train[regressorname] = get_regression_metrics(regressor, df_train_stratified[FEATURES], df_train_stratified[TARGET])
print("Training Metrics:", dummy_regressor_results_train)
print("Testing Metrics:", dummy_regressor_results_test)
Calculating metrics for mean
Calculating metrics for median
Training Metrics: {'mean': {'mae': np.float64(6.332682697746154), 'mse': np.float64(54.36178484567787), 'max_error': np.float64(22.60380294886913)}, 'median': {'mae': np.float64(6.260356674045895), 'mse': np.float64(56.256641800406506), 'max_error': np.float64(23.980340981299996)}}
Testing Metrics: {'mean': {'mae': np.float64(6.27366716404819), 'mse': np.float64(53.03274968687297), 'max_error': np.float64(22.680388292469132)}, 'median': {'mae': np.float64(6.183402451368086), 'mse': np.float64(54.69101599465944), 'max_error': np.float64(24.056926324899997)}}
5. Build actual regression models¶
Let's build a simple kernel ridge regression (KRR) machine learning model and train it with our raw data. You can try different kernels, but we recommend to start with the Gaussian radial basis function ('rbf') kernel.
$\color{DarkBlue}{\textsf{Short Question}}$
- Do you expect this model to perform better than the dummy models?
Answer: Yes, one expects this model to be better than the dummy model because it appears to be more advanced and complex.
- Train it and then calculate the performance metrics on the training and test set. How do they compare to the performance of the dummy models?
Answer: The calculated values for the maximum error, mean absolute error and mean square error for this model are:
- For the training set: 'mae': 5.8930721293080754, 'mse': 48.35125984203933, 'max_error': 17.207763399500003
- For the testing set: 'mae': 11.692068235569346, 'mse': 189.94569670655383, 'max_error': 34.4921121426
As it can be seen with the values above, the performance of this model is better for the training set as the values are lower, but worse for the test set.
- What is the shape of the Kernel and of the weights? (you can check your answer by looking at the
dual_coef_attribute of the KRR instance. You can get shapes of objects using theshapeatrribute
Answer: The shape of the Kernel and of the weights is a matrix of shape (nb(training point),). The shape is in this case: (12165,). This value comes from the calculation: 0.7 ⋅ 17379, where 0.7 corresponds to the fraction of the dataset used for the training data and 17379 to the number of MOFs in the data set.
# Train the model with a Gaussian kernel
krr = KernelRidge(kernel='rbf')
krr.fit(df_train_stratified[FEATURES], df_train_stratified[TARGET])
KernelRidge(kernel='rbf')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KernelRidge(kernel='rbf')
# get the metrics on the train and the test set using the get_regression_metrics functions (as above)
krr_metrics_train = get_regression_metrics(krr, df_train_stratified[FEATURES], df_train_stratified[TARGET])
krr_metrics_test = get_regression_metrics(krr, df_test_stratified[FEATURES], df_test_stratified[TARGET])
print("Kernel Ridge Regression Metrics on Training Set:")
print(krr_metrics_train)
print("\nKernel Ridge Regression Metrics on Test Set:")
print(krr_metrics_test)
Kernel Ridge Regression Metrics on Training Set:
{'mae': np.float64(5.8930721293080754), 'mse': np.float64(48.35125984203933), 'max_error': np.float64(17.207763399500003)}
Kernel Ridge Regression Metrics on Test Set:
{'mae': np.float64(11.692068235569346), 'mse': np.float64(189.94569670655383), 'max_error': np.float64(34.4921121426)}
6. Evaluate the model performance in detail¶
We have trained our first machine learning model! We'll first have a closer look at its performance, before learning how to improve it.
$\color{DarkBlue}{\textsf{Short Exercise}}$
Create a parity plot (true values against predictions)for the training and test data
Plot a histogram of the distribution of the training and test errors on the training and test set. Plot the errors also as a function of the true value
Let's assume we would like to use our model for pre-screening a library of millions of porous materials to zoom in on those with the most promising gas uptake. Could you tolerate the errors of your model?
$\color{Green}{\textsf{Answer}}$: The error of the test set is too big, therefore it can't be tolerated. Thus, if we predict the MOF's capability to adsorb CO2 , the prediction would be skewed and our pre-screening would not work.
- Compare the parity plots for this model with the ones for the dummy models. Use the plotting functions below the evaluate all the following models you train.
For this exercise, it can be handy to save the results in a dictionary, e.g.
(python)
res_train = {
'y true': [],
'y pred': []
}
$\color{Green}{\textsf{Answer}}$: Below are the parity plots for the KRR model and the two dummy models (mean and median). One can see that the parity plots for the dummy models could not be plotted like the KKR model due to the division by zero, causing an error. However, it is still possible to draw its shape with a linear curve. Thus, by comparing the plots, one can see that all the models fit badly the data. Nevertheless, the KRR model predicts slightly the trend as the points are closer to the y = x curve, but still not ideal.
Click here for hints for plotting
- If you want to use matplotlib to make the parity plots, you can use the hist2d function
- To create the frequencies and the edges of a histogram, one can use
np.histogram
# Create dictionaries with training and test results to create parity plots
res_train = {
'y true': df_train_stratified[TARGET],
'y pred': krr.predict(df_train_stratified[FEATURES])
}
res_test = {
'y true': df_test_stratified[TARGET],
'y pred': krr.predict(df_test_stratified[FEATURES])
}
Now, lets calculate the errors
res_train["error"] = res_train["y true"] - res_train["y pred"]
res_test["error"] = res_test["y true"] - res_test["y pred"]
Now, plot the parity plots and error distributions
Click here for hints for plotting
If you want interactive plots, you can use the following code:
hv.extension("bokeh")
hex_train = hv.HexTiles(res_train, ["y true", "y pred"]).hist(
dimension=["y true", "y pred"]
)
hex_test = hv.HexTiles(res_test, ["y true", "y pred"]).hist(
dimension=["y true", "y pred"]
)
hex_train + hex_test
# plot it
import holoviews as hv
hv.extension('bokeh')
hex_train = hv.HexTiles(res_train, ['y true', 'y pred']).hist(dimension=['y true','y pred'])
hex_test = hv.HexTiles(res_test, ['y true', 'y pred']).hist(dimension=['y true', 'y pred'])
x = np.linspace(0,35, 1000)
line_curve = hv.Curve((x, x), 'x', 'y').opts(color='red', line_dash='dashed')
hex_train * line_curve + hex_test* line_curve
# Create dictionaries with training and test results to create parity plots for dummy mean model
res_train_dummy_mean = {
'y true': df_train_stratified[TARGET],
'y pred': dummyregressor_mean.predict(df_train_stratified[FEATURES])
}
res_test_dummy_mean = {
'y true': df_test_stratified[TARGET],
'y pred': dummyregressor_mean.predict(df_test_stratified[FEATURES])
}
res_train_dummy_mean['error'] = res_train_dummy_mean['y true'] - res_train_dummy_mean['y pred']
res_test_dummy_mean['error'] = res_test_dummy_mean['y true'] - res_test_dummy_mean['y pred']
# plot it
hv.extension('bokeh')
hex_train_dummy_mean = hv.HexTiles(res_train_dummy_mean, ['y true', 'y pred']).hist(dimension=['y true','y pred'])
hex_test_dummy_mean = hv.HexTiles(res_test_dummy_mean, ['y true', 'y pred']).hist(dimension=['y true', 'y pred'])
line_mean_train = hv.Curve((res_train_dummy_mean['y true'], res_train_dummy_mean['y pred']), 'x', 'y').opts(color='red')
line_mean_test = hv.Curve((res_test_dummy_mean['y true'], res_test_dummy_mean['y pred']), 'x', 'y').opts(color='red')
hex_train_dummy_mean * line_mean_train + hex_test_dummy_mean* line_mean_test
/Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/bokeh/util/hex.py:145: RuntimeWarning: divide by zero encountered in divide x = x / size * (aspect_scale if orientation == "pointytop" else 1) /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/bokeh/util/hex.py:145: RuntimeWarning: invalid value encountered in multiply x = x / size * (aspect_scale if orientation == "pointytop" else 1) /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/bokeh/util/hex.py:146: RuntimeWarning: divide by zero encountered in divide y = -y / size / (aspect_scale if orientation == "flattop" else 1) /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/bokeh/util/hex.py:251: RuntimeWarning: invalid value encountered in cast return q.astype(int), r.astype(int) /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/bokeh/util/hex.py:145: RuntimeWarning: divide by zero encountered in divide x = x / size * (aspect_scale if orientation == "pointytop" else 1) /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/bokeh/util/hex.py:145: RuntimeWarning: invalid value encountered in multiply x = x / size * (aspect_scale if orientation == "pointytop" else 1) /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/bokeh/util/hex.py:146: RuntimeWarning: divide by zero encountered in divide y = -y / size / (aspect_scale if orientation == "flattop" else 1) /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/bokeh/util/hex.py:251: RuntimeWarning: invalid value encountered in cast return q.astype(int), r.astype(int)
# Create dictionaries with training and test results to create parity plots for dummy median model
res_train_dummy_median = {
'y true': df_train_stratified[TARGET],
'y pred': dummyregressor_median.predict(df_train_stratified[FEATURES])
}
res_test_dummy_median = {
'y true': df_test_stratified[TARGET],
'y pred': dummyregressor_median.predict(df_test_stratified[FEATURES])
}
res_train_dummy_median['error'] = res_train_dummy_median['y true'] - res_train_dummy_median['y pred']
res_test_dummy_median['error'] = res_test_dummy_median['y true'] - res_test_dummy_median['y pred']
# plot it
hv.extension('bokeh')
hex_train_dummy_median= hv.HexTiles(res_train_dummy_median, ['y true', 'y pred']).hist(dimension=['y true','y pred'])
hex_test_dummy_median = hv.HexTiles(res_test_dummy_median, ['y true', 'y pred']).hist(dimension=['y true', 'y pred'])
line_median_train = hv.Curve((res_train_dummy_median['y true'], res_train_dummy_median['y pred']), 'x', 'y').opts(color='red')
line_median_test = hv.Curve((res_test_dummy_median['y true'], res_test_dummy_median['y pred']), 'x', 'y').opts(color='red')
hex_train_dummy_median * line_median_train + hex_test_dummy_median* line_median_test
/Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/bokeh/util/hex.py:145: RuntimeWarning: divide by zero encountered in divide x = x / size * (aspect_scale if orientation == "pointytop" else 1) /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/bokeh/util/hex.py:145: RuntimeWarning: invalid value encountered in multiply x = x / size * (aspect_scale if orientation == "pointytop" else 1) /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/bokeh/util/hex.py:146: RuntimeWarning: divide by zero encountered in divide y = -y / size / (aspect_scale if orientation == "flattop" else 1) /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/bokeh/util/hex.py:251: RuntimeWarning: invalid value encountered in cast return q.astype(int), r.astype(int) /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/bokeh/util/hex.py:145: RuntimeWarning: divide by zero encountered in divide x = x / size * (aspect_scale if orientation == "pointytop" else 1) /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/bokeh/util/hex.py:145: RuntimeWarning: invalid value encountered in multiply x = x / size * (aspect_scale if orientation == "pointytop" else 1) /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/bokeh/util/hex.py:146: RuntimeWarning: divide by zero encountered in divide y = -y / size / (aspect_scale if orientation == "flattop" else 1) /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/bokeh/util/hex.py:251: RuntimeWarning: invalid value encountered in cast return q.astype(int), r.astype(int)
7. Improve the model¶
Our training set still has a couple of issues you might have noticed:
- The feature values are not scaled (different features are measured in different units ...)
- Some features are basically constant, i.e. do not contain relevant information and just increase the dimensionality of the problem
- Some feature distributions are skewed (which is more relevant for some models than for others ...)
$\color{DarkBlue}{\textsf{Short Question}}$
- Why might the scaling of the features be relevant for a machine-learning model?
$\color{Green}{\textsf{Answer}}$: In machine learning, scaling features is crucial for standardizing the data across each variable, making them comparable. When features are on different scales, those with larger magnitudes can disproportionately influence the model's performance, leading to biased results. By scaling the features, we ensure that each one contributes equally to the training process. This uniformity allows the model to learn more effectively, as it can accurately capture relationships between features without being skewed by their varying scales. Overall, proper scaling enhances the model's convergence speed and improves its predictive accuracy.
7.1. Standard scaling and building a first pipeline¶
Given that we will now go beyond training a single model, we will build Pipelines, which are objects that can collect a selection of transformations and estimators. This makes it quite easy to apply the same set of operations to different datasets. A simple pipeline might be built as follows

$\color{DarkBlue}{\textsf{Short Exercise}}$
- Build a pipline that first performs standard scaling and then fits a KRR. Call it
pipe_w_scaling. - Fit it on the training set
- Make predictions, calculate the errors and make the parity plots
Click here for hints
- the
fit,predictmethods also work for pipelines
hv.extension('bokeh')
pipe_w_scaling = Pipeline(
[
('scaling', StandardScaler()),
('krr', KernelRidge(kernel='rbf'))
]
)
# Fit the pipeline on the training set
pipe_w_scaling.fit(df_train_stratified[FEATURES], df_train_stratified[TARGET])
# Calculate errors
res_train = {
'y true': df_train_stratified[TARGET],
'y pred': pipe_w_scaling.predict(df_train_stratified[FEATURES])
}
res_test = {
'y true': df_test_stratified[TARGET],
'y pred': pipe_w_scaling.predict(df_test_stratified[FEATURES])
}
res_train["error"] = res_train["y true"] - res_train["y pred"]
res_test["error"] = res_test["y true"] - res_test["y pred"]
# Create parity plots
hex_train = hv.HexTiles(res_train, ['y true', 'y pred']).hist(dimension=['y true','y pred'])
hex_test = hv.HexTiles(res_test, ['y true', 'y pred']).hist(dimension=['y true', 'y pred'])
x = np.linspace(0,35, 1000)
line_curve = hv.Curve((x, x), 'x', 'y').opts(color='red', line_dash='dashed')
hex_train * line_curve + hex_test* line_curve #show the combined plot
7.2. Hyperparameter optimization¶
A key component we did not optimize so far are hyperparameters. Those are parameters of the model that we usually cannot learn from the data but have to fix before we train the model. Since we cannot learn those parameters it is not trivial to select them. Hence, what we typically do in practice is to create another set, a "validation set", and use it to test models trained with different hyperparameters.
The most common approach to hyperparameter optimization is to define a grid of all relevant parameters and to search over the grid for the best model performance.
$\color{DarkBlue}{\textsf{Short Exercise}}$
- Think about which parameters you could optimize in the pipeline. Note that your KRR model has two parameters you can optimize. You can also switch off some steps by setting them to `None'.
- For each parameter you need to define a resonable grid to search over.
- Recall, what k-fold cross-validation does. Run the hyperparameter optimization using 5-fold cross-validation (you can adjust the number of folds according to your computational resources/impatience. It turns out at k=10 is the best tradeoff between variance and bias). Tune the hyperparameters until you are statisfied (e.g., until you cannot improve the cross validated error any more)
- Why don't we use the test set for hyperparameter tuning but instead test on the validation set?
Answer: The test set needs to stay completely unseen by the model during training so it gives an honest evaluation of performance at the end. If we use the test set for tuning, the model will start to "learn" from it, making the performance results unreliable. Instead, we use the validation set to adjust the model without exposing it to the test data.
- Evaluate the model performance by calculating the performance metrics (MAE, MSE, max error) on the training and the test set.
- Optional: Instead of grid search, try to use random search on the same grid (
RandomizedSearchCV) and fix the number of evaluations (n_iter) to a fraction of the number of evaluations of grid search. What do you observe and conclude?
$\color{DarkRed}{\textsf{Tips}}$
If you want to see what is happening, set the
verbosityargument of theGridSearchCVobject to a higher number.If you want to speed up the optimization, you can run it in parallel by setting the
n_jobsargument to the number of workers. If you set it to -1 it will use all available cores. Using all cores might freeze your computer if you do not have enough memoryIf the optimization is too slow, reduce the number of data points in your set, the number of folds or the grid size. Note that it can also be a feasible strategy to first use a coarser grid and the a finer grid for fine-tuning.
For grid search, you need to define a parameter grid, which is a dictionary of the following form:
(python)
param_grid = {
'pipelinestage__parameter': np.logspace(-4,1,10),
'pipelinestage': [None, TransformerA(), TransformerB()]
}
After the search, you can access the best model with
.best_estimator_and the best parameters with.best_params_on the GridSearchCV instance. For examplegrid_krr.best_estimator_If you initialize the GridSearchCV instance with
refit=Trueit will automatically train the model with all training data (and not only the training folds from cross-validations)
The double underscore (dunder) notation works recursively and specifies the parameters for any pipeline stage.
For example, ovasvm__estimator__cls__C would specifiy the C parameter of the estimator in the one-versus-rest classifier ovasvm.
You can print all parameters of the pipeline using print(sorted(pipeline.get_params().keys()))
Click here for hints about pipelines and grid search
- You can use the
np.logspacefunction to generate a grid for values that you want to vary on a logarithmic scale - There are two hyperparameters for KRR: the regularization strength
alphaand the Gaussian widthgamma - For the regularization strength, values between 1 and 1e-3 can be reasonable. For gamma you can use the median heuristic, gamma = 1 / median, or values between 1e-3 and 1e3
# Define the parameter grid and the grid search object
pipe_w_scaling_hyp = Pipeline(
[
('scaling', StandardScaler()),
('krr', KernelRidge(kernel='rbf'))
]
)
param_grid = {
'scaling': [StandardScaler(), MinMaxScaler(), RobustScaler()], #test different scaling methods
'krr__alpha': [0.00076, 0.00077],
'krr__gamma': [0.15, 0.17, 0.2]
}
grid_krr = GridSearchCV(pipe_w_scaling_hyp, param_grid=param_grid,
cv=10, verbose=10, n_jobs=-1, refit=True)
# optional random search
#random_krr = RandomizedSearchCV(#your pipeline, param_distributions=param_grid, n_iter=#number of evaluations,
# cv=#number of folds, verbose=2, n_jobs=2)
# run the grid search by calling the fit method
grid_krr.fit(df_train_stratified[FEATURES], df_train_stratified[TARGET])
# optional random search
# random_krr.fit(#fillme)
Fitting 10 folds for each of 18 candidates, totalling 180 fits [CV 4/10; 1/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=StandardScaler() [CV 7/10; 1/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=StandardScaler() [CV 2/10; 1/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=StandardScaler() [CV 6/10; 1/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=StandardScaler() [CV 3/10; 1/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=StandardScaler() [CV 1/10; 1/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=StandardScaler() [CV 5/10; 1/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=StandardScaler() [CV 8/10; 1/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=StandardScaler() [CV 1/10; 1/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=StandardScaler();, score=0.473 total time= 1.2min [CV 9/10; 1/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=StandardScaler() [CV 5/10; 1/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=StandardScaler();, score=0.461 total time= 2.9min [CV 10/10; 1/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=StandardScaler() [CV 10/10; 1/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=StandardScaler();, score=0.436 total time= 1.0min [CV 1/10; 2/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=MinMaxScaler() [CV 2/10; 1/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=StandardScaler();, score=0.483 total time= 6.6min [CV 8/10; 1/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=StandardScaler();, score=0.446 total time= 6.6min [CV 2/10; 2/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=MinMaxScaler() [CV 3/10; 2/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=MinMaxScaler() [CV 4/10; 1/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=StandardScaler();, score=0.489 total time= 6.6min [CV 3/10; 1/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=StandardScaler();, score=0.437 total time= 6.6min [CV 6/10; 1/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=StandardScaler();, score=0.466 total time= 6.6min [CV 7/10; 1/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=StandardScaler();, score=0.403 total time= 6.6min [CV 9/10; 1/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=StandardScaler();, score=0.452 total time= 5.4min [CV 4/10; 2/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=MinMaxScaler() [CV 6/10; 2/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=MinMaxScaler() [CV 5/10; 2/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=MinMaxScaler() [CV 7/10; 2/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=MinMaxScaler() [CV 8/10; 2/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=MinMaxScaler() [CV 1/10; 2/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=MinMaxScaler();, score=0.984 total time= 2.7min [CV 9/10; 2/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=MinMaxScaler() [CV 9/10; 2/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=MinMaxScaler();, score=0.982 total time= 46.9s [CV 10/10; 2/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=MinMaxScaler() [CV 8/10; 2/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=MinMaxScaler();, score=0.980 total time= 2.9min [CV 1/10; 3/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=RobustScaler() [CV 10/10; 2/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=MinMaxScaler();, score=0.980 total time= 3.0min [CV 2/10; 3/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=RobustScaler() [CV 2/10; 2/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=MinMaxScaler();, score=0.981 total time= 4.7min [CV 3/10; 3/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=RobustScaler() [CV 3/10; 2/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=MinMaxScaler();, score=0.979 total time= 4.8min [CV 4/10; 3/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=RobustScaler() [CV 2/10; 3/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=RobustScaler();, score=0.205 total time= 1.9min [CV 5/10; 3/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=RobustScaler() [CV 1/10; 3/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=RobustScaler();, score=0.283 total time= 4.2min[CV 7/10; 2/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=MinMaxScaler();, score=0.979 total time= 7.1min [CV 6/10; 3/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=RobustScaler() [CV 7/10; 3/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=RobustScaler() [CV 4/10; 3/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=RobustScaler();, score=0.254 total time= 2.4min [CV 8/10; 3/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=RobustScaler() [CV 3/10; 3/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=RobustScaler();, score=0.157 total time= 3.2min [CV 9/10; 3/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=RobustScaler() [CV 4/10; 2/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=MinMaxScaler();, score=0.982 total time= 9.1min [CV 10/10; 3/18] START krr__alpha=0.00076, krr__gamma=0.15, scaling=RobustScaler() [CV 5/10; 2/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=MinMaxScaler();, score=0.973 total time= 9.1min [CV 8/10; 3/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=RobustScaler();, score=0.221 total time= 1.8min [CV 2/10; 4/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=StandardScaler() [CV 1/10; 4/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=StandardScaler() [CV 5/10; 3/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=RobustScaler();, score=0.251 total time= 4.2min [CV 6/10; 2/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=MinMaxScaler();, score=0.980 total time= 9.9min [CV 3/10; 4/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=StandardScaler() [CV 4/10; 4/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=StandardScaler() [CV 9/10; 3/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=RobustScaler();, score=0.235 total time= 3.9min [CV 6/10; 3/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=RobustScaler();, score=0.226 total time= 4.7min [CV 5/10; 4/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=StandardScaler() [CV 6/10; 4/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=StandardScaler() [CV 7/10; 3/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=RobustScaler();, score=0.149 total time= 5.5min [CV 7/10; 4/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=StandardScaler() [CV 2/10; 4/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=StandardScaler();, score=0.423 total time= 5.6min [CV 8/10; 4/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=StandardScaler() [CV 7/10; 4/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=StandardScaler();, score=0.347 total time= 2.0min [CV 9/10; 4/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=StandardScaler() [CV 1/10; 4/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=StandardScaler();, score=0.426 total time= 6.2min [CV 10/10; 4/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=StandardScaler() [CV 10/10; 3/18] END krr__alpha=0.00076, krr__gamma=0.15, scaling=RobustScaler();, score=0.184 total time= 6.3min [CV 1/10; 5/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=MinMaxScaler() [CV 3/10; 4/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=StandardScaler();, score=0.379 total time= 6.0min[CV 4/10; 4/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=StandardScaler();, score=0.431 total time= 6.0min [CV 2/10; 5/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=MinMaxScaler() [CV 3/10; 5/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=MinMaxScaler() [CV 6/10; 4/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=StandardScaler();, score=0.407 total time= 4.8min [CV 4/10; 5/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=MinMaxScaler() [CV 5/10; 4/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=StandardScaler();, score=0.413 total time= 5.5min [CV 5/10; 5/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=MinMaxScaler() [CV 5/10; 5/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=MinMaxScaler();, score=0.973 total time= 1.4min [CV 6/10; 5/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=MinMaxScaler() [CV 1/10; 5/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=MinMaxScaler();, score=0.984 total time= 3.9min [CV 7/10; 5/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=MinMaxScaler() [CV 6/10; 5/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=MinMaxScaler();, score=0.980 total time= 1.2min [CV 8/10; 5/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=MinMaxScaler() [CV 8/10; 4/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=StandardScaler();, score=0.397 total time= 5.4min [CV 9/10; 4/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=StandardScaler();, score=0.399 total time= 5.3min [CV 9/10; 5/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=MinMaxScaler() [CV 10/10; 5/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=MinMaxScaler() [CV 10/10; 4/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=StandardScaler();, score=0.375 total time= 6.0min [CV 1/10; 6/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=RobustScaler() [CV 2/10; 5/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=MinMaxScaler();, score=0.981 total time= 6.3min [CV 2/10; 6/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=RobustScaler() [CV 4/10; 5/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=MinMaxScaler();, score=0.982 total time= 6.1min [CV 1/10; 6/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=RobustScaler();, score=0.223 total time= 1.5min [CV 3/10; 5/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=MinMaxScaler();, score=0.980 total time= 6.9min [CV 3/10; 6/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=RobustScaler() [CV 5/10; 6/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=RobustScaler() [CV 4/10; 6/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=RobustScaler() [CV 2/10; 6/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=RobustScaler();, score=0.134 total time= 1.5min [CV 6/10; 6/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=RobustScaler() [CV 7/10; 5/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=MinMaxScaler();, score=0.980 total time= 4.6min [CV 7/10; 6/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=RobustScaler() [CV 6/10; 6/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=RobustScaler();, score=0.158 total time= 1.1min [CV 7/10; 6/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=RobustScaler();, score=0.080 total time= 51.3s [CV 8/10; 6/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=RobustScaler() [CV 9/10; 6/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=RobustScaler() [CV 8/10; 6/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=RobustScaler();, score=0.153 total time= 1.2min [CV 10/10; 6/18] START krr__alpha=0.00076, krr__gamma=0.17, scaling=RobustScaler() [CV 9/10; 6/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=RobustScaler();, score=0.170 total time= 1.3min [CV 1/10; 7/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=StandardScaler() [CV 1/10; 7/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=StandardScaler();, score=0.359 total time= 50.5s [CV 2/10; 7/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=StandardScaler() [CV 9/10; 5/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=MinMaxScaler();, score=0.983 total time= 7.2min [CV 3/10; 7/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=StandardScaler() [CV 8/10; 5/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=MinMaxScaler();, score=0.980 total time= 7.8min [CV 10/10; 5/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=MinMaxScaler();, score=0.980 total time= 7.7min [CV 4/10; 7/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=StandardScaler() [CV 5/10; 7/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=StandardScaler() [CV 10/10; 6/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=RobustScaler();, score=0.108 total time= 2.2min [CV 6/10; 7/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=StandardScaler() [CV 4/10; 6/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=RobustScaler();, score=0.181 total time= 6.6min [CV 7/10; 7/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=StandardScaler() [CV 3/10; 6/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=RobustScaler();, score=0.085 total time= 6.6min [CV 8/10; 7/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=StandardScaler() [CV 5/10; 6/18] END krr__alpha=0.00076, krr__gamma=0.17, scaling=RobustScaler();, score=0.188 total time= 6.9min [CV 9/10; 7/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=StandardScaler() [CV 2/10; 7/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=StandardScaler();, score=0.338 total time= 3.2min [CV 10/10; 7/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=StandardScaler() [CV 9/10; 7/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=StandardScaler();, score=0.324 total time= 1.7min [CV 1/10; 8/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=MinMaxScaler() [CV 3/10; 7/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=StandardScaler();, score=0.299 total time= 4.3min [CV 2/10; 8/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=MinMaxScaler() [CV 10/10; 7/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=StandardScaler();, score=0.288 total time= 2.2min [CV 3/10; 8/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=MinMaxScaler() [CV 4/10; 7/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=StandardScaler();, score=0.349 total time= 4.8min [CV 4/10; 8/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=MinMaxScaler() [CV 6/10; 7/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=StandardScaler();, score=0.324 total time= 4.3min [CV 5/10; 8/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=MinMaxScaler() [CV 5/10; 7/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=StandardScaler();, score=0.343 total time= 4.8min [CV 6/10; 8/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=MinMaxScaler() [CV 7/10; 7/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=StandardScaler();, score=0.267 total time= 4.0min [CV 7/10; 8/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=MinMaxScaler() [CV 8/10; 7/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=StandardScaler();, score=0.327 total time= 4.5min [CV 3/10; 8/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=MinMaxScaler();, score=0.981 total time= 1.5min [CV 8/10; 8/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=MinMaxScaler() [CV 9/10; 8/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=MinMaxScaler() [CV 7/10; 8/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=MinMaxScaler();, score=0.980 total time= 1.6min [CV 10/10; 8/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=MinMaxScaler() [CV 1/10; 8/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=MinMaxScaler();, score=0.985 total time= 3.9min [CV 2/10; 8/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=MinMaxScaler();, score=0.981 total time= 3.7min [CV 1/10; 9/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=RobustScaler() [CV 8/10; 8/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=MinMaxScaler();, score=0.979 total time= 1.4min [CV 2/10; 9/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=RobustScaler() [CV 3/10; 9/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=RobustScaler() [CV 6/10; 8/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=MinMaxScaler();, score=0.980 total time= 3.8min [CV 10/10; 8/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=MinMaxScaler();, score=0.979 total time= 1.2min [CV 4/10; 9/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=RobustScaler() [CV 5/10; 9/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=RobustScaler() [CV 9/10; 8/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=MinMaxScaler();, score=0.983 total time= 2.9min [CV 6/10; 9/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=RobustScaler() [CV 5/10; 8/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=MinMaxScaler();, score=0.972 total time= 4.5min [CV 7/10; 9/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=RobustScaler() [CV 4/10; 9/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=RobustScaler();, score=0.076 total time= 2.3min [CV 8/10; 9/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=RobustScaler() [CV 4/10; 8/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=MinMaxScaler();, score=0.982 total time= 6.9min [CV 9/10; 9/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=RobustScaler() [CV 8/10; 9/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=RobustScaler();, score=0.058 total time= 1.4min [CV 10/10; 9/18] START krr__alpha=0.00076, krr__gamma=0.2, scaling=RobustScaler() [CV 1/10; 9/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=RobustScaler();, score=0.136 total time= 5.1min [CV 2/10; 9/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=RobustScaler();, score=0.034 total time= 5.1min [CV 1/10; 10/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=StandardScaler() [CV 2/10; 10/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=StandardScaler() [CV 1/10; 10/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=StandardScaler();, score=0.473 total time= 1.4min [CV 3/10; 10/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=StandardScaler() [CV 7/10; 9/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=RobustScaler();, score=-0.018 total time= 5.1min [CV 4/10; 10/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=StandardScaler() [CV 3/10; 9/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=RobustScaler();, score=-0.017 total time= 7.2min [CV 5/10; 10/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=StandardScaler() [CV 6/10; 9/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=RobustScaler();, score=0.063 total time= 6.2min [CV 5/10; 9/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=RobustScaler();, score=0.098 total time= 6.8min [CV 6/10; 10/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=StandardScaler() [CV 7/10; 10/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=StandardScaler() [CV 5/10; 10/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=StandardScaler();, score=0.461 total time= 31.3s [CV 8/10; 10/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=StandardScaler() [CV 8/10; 10/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=StandardScaler();, score=0.446 total time= 53.7s [CV 3/10; 10/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=StandardScaler();, score=0.437 total time= 2.1min [CV 9/10; 10/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=StandardScaler() [CV 10/10; 10/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=StandardScaler() [CV 2/10; 10/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=StandardScaler();, score=0.483 total time= 4.6min [CV 1/10; 11/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=MinMaxScaler() [CV 10/10; 10/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=StandardScaler();, score=0.436 total time= 1.0min [CV 2/10; 11/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=MinMaxScaler() [CV 10/10; 9/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=RobustScaler();, score=0.005 total time= 6.2min [CV 3/10; 11/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=MinMaxScaler() [CV 1/10; 11/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=MinMaxScaler();, score=0.984 total time= 1.6min [CV 4/10; 11/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=MinMaxScaler() [CV 2/10; 11/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=MinMaxScaler();, score=0.981 total time= 2.4min [CV 9/10; 9/18] END krr__alpha=0.00076, krr__gamma=0.2, scaling=RobustScaler();, score=0.076 total time= 8.2min [CV 5/10; 11/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=MinMaxScaler() [CV 6/10; 11/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=MinMaxScaler() [CV 9/10; 10/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=StandardScaler();, score=0.452 total time= 4.9min [CV 7/10; 11/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=MinMaxScaler() [CV 4/10; 11/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=MinMaxScaler();, score=0.982 total time= 2.7min [CV 8/10; 11/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=MinMaxScaler() [CV 3/10; 11/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=MinMaxScaler();, score=0.979 total time= 3.8min [CV 9/10; 11/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=MinMaxScaler() [CV 6/10; 10/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=StandardScaler();, score=0.466 total time= 7.8min [CV 10/10; 11/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=MinMaxScaler() [CV 7/10; 10/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=StandardScaler();, score=0.403 total time= 8.1min [CV 1/10; 12/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=RobustScaler() [CV 4/10; 10/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=StandardScaler();, score=0.489 total time= 9.4min [CV 2/10; 12/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=RobustScaler() [CV 7/10; 11/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=MinMaxScaler();, score=0.979 total time= 3.2min [CV 1/10; 12/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=RobustScaler();, score=0.283 total time= 58.8s [CV 3/10; 12/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=RobustScaler() [CV 4/10; 12/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=RobustScaler() [CV 2/10; 12/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=RobustScaler();, score=0.205 total time= 35.5s [CV 5/10; 12/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=RobustScaler() [CV 5/10; 12/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=RobustScaler();, score=0.251 total time= 41.1s [CV 6/10; 12/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=RobustScaler() [CV 9/10; 11/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=MinMaxScaler();, score=0.982 total time= 3.0min [CV 6/10; 11/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=MinMaxScaler();, score=0.980 total time= 5.6min [CV 7/10; 12/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=RobustScaler() [CV 8/10; 12/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=RobustScaler() [CV 3/10; 12/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=RobustScaler();, score=0.157 total time= 2.0min [CV 9/10; 12/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=RobustScaler() [CV 6/10; 12/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=RobustScaler();, score=0.226 total time= 1.4min [CV 7/10; 12/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=RobustScaler();, score=0.149 total time= 1.2min [CV 10/10; 12/18] START krr__alpha=0.00077, krr__gamma=0.15, scaling=RobustScaler() [CV 1/10; 13/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=StandardScaler() [CV 5/10; 11/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=MinMaxScaler();, score=0.973 total time= 7.4min [CV 2/10; 13/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=StandardScaler() [CV 2/10; 13/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=StandardScaler();, score=0.423 total time= 1.4min [CV 3/10; 13/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=StandardScaler() [CV 8/10; 11/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=MinMaxScaler();, score=0.980 total time= 7.0min [CV 4/10; 13/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=StandardScaler() [CV 4/10; 12/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=RobustScaler();, score=0.254 total time= 4.6min [CV 5/10; 13/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=StandardScaler() [CV 10/10; 11/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=MinMaxScaler();, score=0.980 total time= 6.3min [CV 9/10; 12/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=RobustScaler();, score=0.235 total time= 3.0min [CV 6/10; 13/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=StandardScaler() [CV 7/10; 13/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=StandardScaler() [CV 5/10; 13/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=StandardScaler();, score=0.413 total time= 48.4s [CV 8/10; 13/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=StandardScaler() [CV 8/10; 13/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=StandardScaler();, score=0.397 total time= 1.3min [CV 9/10; 13/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=StandardScaler() [CV 8/10; 12/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=RobustScaler();, score=0.221 total time= 6.7min [CV 10/10; 13/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=StandardScaler() [CV 9/10; 13/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=StandardScaler();, score=0.399 total time= 1.2min [CV 1/10; 14/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=MinMaxScaler() [CV 1/10; 13/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=StandardScaler();, score=0.426 total time= 6.6min [CV 2/10; 14/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=MinMaxScaler() [CV 10/10; 12/18] END krr__alpha=0.00077, krr__gamma=0.15, scaling=RobustScaler();, score=0.184 total time= 6.9min [CV 10/10; 13/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=StandardScaler();, score=0.375 total time= 1.4min [CV 3/10; 14/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=MinMaxScaler() [CV 4/10; 14/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=MinMaxScaler() [CV 2/10; 14/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=MinMaxScaler();, score=0.981 total time= 1.4min [CV 5/10; 14/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=MinMaxScaler() [CV 4/10; 13/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=StandardScaler();, score=0.431 total time= 6.3min [CV 3/10; 13/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=StandardScaler();, score=0.379 total time= 6.3min [CV 6/10; 14/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=MinMaxScaler() [CV 7/10; 14/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=MinMaxScaler() [CV 5/10; 14/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=MinMaxScaler();, score=0.973 total time= 35.7s [CV 8/10; 14/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=MinMaxScaler() [CV 7/10; 13/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=StandardScaler();, score=0.347 total time= 6.4min [CV 9/10; 14/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=MinMaxScaler() [CV 6/10; 13/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=StandardScaler();, score=0.407 total time= 6.6min [CV 10/10; 14/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=MinMaxScaler() [CV 8/10; 14/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=MinMaxScaler();, score=0.980 total time= 53.0s [CV 1/10; 15/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=RobustScaler() [CV 9/10; 14/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=MinMaxScaler();, score=0.983 total time= 1.1min [CV 2/10; 15/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=RobustScaler() [CV 1/10; 15/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=RobustScaler();, score=0.223 total time= 1.2min [CV 3/10; 15/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=RobustScaler() [CV 1/10; 14/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=MinMaxScaler();, score=0.984 total time= 6.0min [CV 4/10; 15/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=RobustScaler() [CV 3/10; 14/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=MinMaxScaler();, score=0.980 total time= 5.2min [CV 4/10; 14/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=MinMaxScaler();, score=0.982 total time= 5.2min [CV 5/10; 15/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=RobustScaler() [CV 6/10; 15/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=RobustScaler() [CV 3/10; 15/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=RobustScaler();, score=0.085 total time= 1.9min [CV 7/10; 15/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=RobustScaler() [CV 4/10; 15/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=RobustScaler();, score=0.181 total time= 1.7min [CV 8/10; 15/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=RobustScaler() [CV 7/10; 15/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=RobustScaler();, score=0.080 total time= 50.5s [CV 9/10; 15/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=RobustScaler() [CV 6/10; 14/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=MinMaxScaler();, score=0.980 total time= 6.2min [CV 10/10; 15/18] START krr__alpha=0.00077, krr__gamma=0.17, scaling=RobustScaler() [CV 2/10; 15/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=RobustScaler();, score=0.134 total time= 4.6min [CV 10/10; 14/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=MinMaxScaler();, score=0.980 total time= 5.6min [CV 1/10; 16/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=StandardScaler() [CV 2/10; 16/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=StandardScaler() [CV 7/10; 14/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=MinMaxScaler();, score=0.980 total time= 6.8min [CV 3/10; 16/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=StandardScaler() [CV 6/10; 15/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=RobustScaler();, score=0.158 total time= 3.0min [CV 4/10; 16/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=StandardScaler() [CV 9/10; 15/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=RobustScaler();, score=0.170 total time= 2.7min [CV 5/10; 16/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=StandardScaler() [CV 10/10; 15/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=RobustScaler();, score=0.108 total time= 2.5min [CV 6/10; 16/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=StandardScaler() [CV 4/10; 16/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=StandardScaler();, score=0.349 total time= 3.4min [CV 7/10; 16/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=StandardScaler() [CV 8/10; 15/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=RobustScaler();, score=0.153 total time= 5.6min [CV 8/10; 16/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=StandardScaler() [CV 6/10; 16/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=StandardScaler();, score=0.324 total time= 3.1min [CV 9/10; 16/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=StandardScaler() [CV 3/10; 16/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=StandardScaler();, score=0.299 total time= 6.0min [CV 10/10; 16/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=StandardScaler() [CV 5/10; 16/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=StandardScaler();, score=0.343 total time= 4.9min [CV 1/10; 17/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=MinMaxScaler() [CV 1/10; 17/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=MinMaxScaler();, score=0.985 total time= 1.1min [CV 2/10; 17/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=MinMaxScaler() [CV 1/10; 16/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=StandardScaler();, score=0.359 total time= 7.5min [CV 3/10; 17/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=MinMaxScaler() [CV 7/10; 16/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=StandardScaler();, score=0.267 total time= 4.8min [CV 5/10; 15/18] END krr__alpha=0.00077, krr__gamma=0.17, scaling=RobustScaler();, score=0.188 total time=11.2min [CV 4/10; 17/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=MinMaxScaler() [CV 5/10; 17/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=MinMaxScaler() [CV 2/10; 16/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=StandardScaler();, score=0.338 total time= 9.0min [CV 6/10; 17/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=MinMaxScaler() [CV 10/10; 16/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=StandardScaler();, score=0.288 total time= 3.1min [CV 7/10; 17/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=MinMaxScaler() [CV 8/10; 16/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=StandardScaler();, score=0.327 total time= 5.3min [CV 8/10; 17/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=MinMaxScaler() [CV 6/10; 17/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=MinMaxScaler();, score=0.980 total time= 1.3min [CV 9/10; 16/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=StandardScaler();, score=0.324 total time= 5.3min [CV 9/10; 17/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=MinMaxScaler() [CV 10/10; 17/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=MinMaxScaler() [CV 8/10; 17/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=MinMaxScaler();, score=0.979 total time= 2.2min [CV 1/10; 18/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=RobustScaler() [CV 1/10; 18/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=RobustScaler();, score=0.136 total time= 1.2min [CV 2/10; 18/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=RobustScaler() [CV 7/10; 17/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=MinMaxScaler();, score=0.980 total time= 4.0min [CV 3/10; 18/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=RobustScaler() [CV 2/10; 17/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=MinMaxScaler();, score=0.981 total time= 6.5min [CV 3/10; 17/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=MinMaxScaler();, score=0.981 total time= 6.5min [CV 4/10; 18/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=RobustScaler() [CV 5/10; 18/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=RobustScaler() [CV 4/10; 17/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=MinMaxScaler();, score=0.982 total time= 6.5min [CV 6/10; 18/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=RobustScaler() [CV 3/10; 18/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=RobustScaler();, score=-0.017 total time= 1.8min [CV 7/10; 18/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=RobustScaler() [CV 5/10; 17/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=MinMaxScaler();, score=0.972 total time= 7.7min [CV 8/10; 18/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=RobustScaler() [CV 9/10; 17/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=MinMaxScaler();, score=0.983 total time= 5.9min [CV 9/10; 18/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=RobustScaler() [CV 8/10; 18/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=RobustScaler();, score=0.058 total time= 1.1min [CV 10/10; 18/18] START krr__alpha=0.00077, krr__gamma=0.2, scaling=RobustScaler() [CV 9/10; 18/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=RobustScaler();, score=0.076 total time= 59.5s [CV 10/10; 17/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=MinMaxScaler();, score=0.979 total time= 7.4min [CV 6/10; 18/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=RobustScaler();, score=0.063 total time= 2.9min [CV 2/10; 18/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=RobustScaler();, score=0.034 total time= 5.1min [CV 5/10; 18/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=RobustScaler();, score=0.098 total time= 4.0min [CV 4/10; 18/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=RobustScaler();, score=0.076 total time= 4.1min [CV 7/10; 18/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=RobustScaler();, score=-0.018 total time= 3.1min [CV 10/10; 18/18] END krr__alpha=0.00077, krr__gamma=0.2, scaling=RobustScaler();, score=0.005 total time= 1.0min
GridSearchCV(cv=10,
estimator=Pipeline(steps=[('scaling', StandardScaler()),
('krr', KernelRidge(kernel='rbf'))]),
n_jobs=-1,
param_grid={'krr__alpha': [0.00076, 0.00077],
'krr__gamma': [0.15, 0.17, 0.2],
'scaling': [StandardScaler(), MinMaxScaler(),
RobustScaler()]},
verbose=10)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=10,
estimator=Pipeline(steps=[('scaling', StandardScaler()),
('krr', KernelRidge(kernel='rbf'))]),
n_jobs=-1,
param_grid={'krr__alpha': [0.00076, 0.00077],
'krr__gamma': [0.15, 0.17, 0.2],
'scaling': [StandardScaler(), MinMaxScaler(),
RobustScaler()]},
verbose=10)Pipeline(steps=[('scaling', MinMaxScaler()),
('krr', KernelRidge(alpha=0.00076, gamma=0.17, kernel='rbf'))])MinMaxScaler()
KernelRidge(alpha=0.00076, gamma=0.17, kernel='rbf')
best_krr_model_scaling_hyp = grid_krr.best_estimator_
# get the performance metrics
print(get_regression_metrics(best_krr_model_scaling_hyp, df_test_stratified[FEATURES], df_test_stratified[TARGET]))
print(get_regression_metrics(best_krr_model_scaling_hyp, df_train_stratified[FEATURES], df_train_stratified[TARGET]))
print(grid_krr.best_params_)
# Create dictionaries with training and test results to create parity plots
res_train_scaling_hyp = {
'y true': df_train_stratified[TARGET],
'y pred': best_krr_model_scaling_hyp.predict(df_train_stratified[FEATURES])
}
res_test_scaling_hyp = {
'y true': df_test_stratified[TARGET],
'y pred': best_krr_model_scaling_hyp.predict(df_test_stratified[FEATURES])
}
res_train_scaling_hyp['error'] = res_train_scaling_hyp['y true'] - res_train_scaling_hyp['y pred']
res_test_scaling_hyp['error'] = res_test_scaling_hyp['y true'] - res_test_scaling_hyp['y pred']
# plot it
hv.extension('bokeh')
hex_train = hv.HexTiles(res_train_scaling_hyp, ['y true', 'y pred']).hist(dimension=['y true','y pred'])
hex_test = hv.HexTiles(res_test_scaling_hyp, ['y true', 'y pred']).hist(dimension=['y true', 'y pred'])
x = np.linspace(0,35, 1000)
line_curve = hv.Curve((x, x), 'x', 'y').opts(color='red', line_dash='dashed')
hex_train * line_curve + hex_test* line_curve
{'mae': np.float64(0.6803943568962254), 'mse': np.float64(1.0667722353675806), 'max_error': np.float64(8.809048368262342)}
{'mae': np.float64(0.5419018068307553), 'mse': np.float64(0.634113231092651), 'max_error': np.float64(6.0766987177234135)}
{'krr__alpha': 0.00076, 'krr__gamma': 0.17, 'scaling': MinMaxScaler()}
Click here for some more information about hyperparameter optimization
Grid search is not the most efficient way to perform hyperparamter optimization. Even random search was shown to be more efficient. Really efficient though are Bayesian optimization approaches like TPE. This is implemented in the hyperopt library, which is also installed in your conda environment.Click here for hyperparameter optimization with hyperopt (advanded and optional outlook)
Import the tools we need
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, mix, rand, anneal, space_eval
from functools import partial
Define the grid
param_hyperopt = {
"krr__alpha": hp.loguniform("krr__alpha", np.log(0.001), np.log(10)),
"krr__gamma": hp.loguniform("krr__gamma", np.log(0.001), np.log(10)),
}
Define the objective function
def objective_function(params):
pipe.set_params(
**{
"krr__alpha": params["krr__alpha"],
"krr__gamma": params["krr__gamma"],
}
)
score = cross_val_score(
pipe, X_train, y_train, cv=10, scoring="neg_mean_absolute_error"
).mean()
return {"loss": -score, "status": STATUS_OK}
We will use a search in which we mix random search, annealing and tpe
trials = Trials()
mix_search = partial(
mix.suggest,
p_suggest=[(0.15, rand.suggest), (0.15, anneal.suggest), (0.7, tpe.suggest)],
)
Now, we can minimize the objective function.
best_param = fmin(
objective_function,
param_hyperopt,
algo=mix_search,
max_evals=MAX_EVALES,
trials=trials,
rstate=np.random.RandomState(RANDOM_SEED),
)
8. Feature Engineering¶
Finally, we would like to remove features with low variance. This can be done by setting a variance threshold.
$\color{DarkBlue}{\textsf{Short Question}}$
- What is the reasoning behind doing this?
Answer: Setting a variance threshold reduces model complexity by removing low-variance features, which often carry little useful information. This can help prevent overfitting, especially if low-variance features mainly capture noise.
- When might it go wrong and why?
Answer: If the threshold is too high, important features may be removed, potentially losing valuable information. Also, if low-variance features have meaningful correlations with other features, removing them could weaken the model's understanding of the data.
$\color{DarkBlue}{\textsf{Short Exercise}}$
- Add a variance threshold to the pipeline (select the correct function argument)
- Use random search for hyperparameter optimization, retrain the pipeline, and calculate the performance metrics (max error, MAE, MSE) on the training and test set
- If you could improve the predictive performance, do not forget to also run the model for the Kaggle competition!
# Define the pipeline
pipe_variance_threshold = Pipeline(
# fillme with the pipeline steps
[
('variance_treshold', VarianceThreshold(threshold=0.01)),
('scaling', StandardScaler()),
('krr', KernelRidge(kernel='rbf'))
]
)
param_grid_variance_threshold = {
'scaling': [StandardScaler(),MinMaxScaler()],
'krr__alpha': [0.0001, 0.001, 0.01, 0.1, 1],
'krr__gamma': [0.001, 0.01, 0.1, 1, 10, 100],
'variance_treshold__threshold': [0, 10e-9, 10e-6, 0.001, 0.01, 0.1, 1, 10, 100]
}
random_variance_treshold = RandomizedSearchCV(pipe_variance_threshold, param_distributions=param_grid_variance_threshold, n_iter=30,
cv=10, verbose=10, n_jobs=-1)
# Fit the pipeline and run the evaluation
random_variance_treshold.fit(df_train_stratified[FEATURES], df_train_stratified[TARGET])
Fitting 10 folds for each of 30 candidates, totalling 300 fits
python(8506) MallocStackLogging: can't turn off malloc stack logging because it was not enabled. python(8507) MallocStackLogging: can't turn off malloc stack logging because it was not enabled. python(8508) MallocStackLogging: can't turn off malloc stack logging because it was not enabled. python(8509) MallocStackLogging: can't turn off malloc stack logging because it was not enabled. python(8510) MallocStackLogging: can't turn off malloc stack logging because it was not enabled. python(8511) MallocStackLogging: can't turn off malloc stack logging because it was not enabled. python(8512) MallocStackLogging: can't turn off malloc stack logging because it was not enabled. python(8513) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
[CV 8/10; 1/30] START krr__alpha=0.01, krr__gamma=10, scaling=StandardScaler(), variance_treshold__threshold=100[CV 1/10; 1/30] START krr__alpha=0.01, krr__gamma=10, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 6/10; 1/30] START krr__alpha=0.01, krr__gamma=10, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 7/10; 1/30] START krr__alpha=0.01, krr__gamma=10, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 5/10; 1/30] START krr__alpha=0.01, krr__gamma=10, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 3/10; 1/30] START krr__alpha=0.01, krr__gamma=10, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 4/10; 1/30] START krr__alpha=0.01, krr__gamma=10, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 2/10; 1/30] START krr__alpha=0.01, krr__gamma=10, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 1/10; 1/30] END krr__alpha=0.01, krr__gamma=10, scaling=StandardScaler(), variance_treshold__threshold=100;, score=-2.334 total time= 3.6min [CV 9/10; 1/30] START krr__alpha=0.01, krr__gamma=10, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 8/10; 1/30] END krr__alpha=0.01, krr__gamma=10, scaling=StandardScaler(), variance_treshold__threshold=100;, score=-2.367 total time= 4.3min [CV 10/10; 1/30] START krr__alpha=0.01, krr__gamma=10, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 9/10; 1/30] END krr__alpha=0.01, krr__gamma=10, scaling=StandardScaler(), variance_treshold__threshold=100;, score=-2.295 total time= 59.2s [CV 1/10; 2/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=0.01 [CV 7/10; 1/30] END krr__alpha=0.01, krr__gamma=10, scaling=StandardScaler(), variance_treshold__threshold=100;, score=-2.310 total time= 4.7min [CV 2/10; 2/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=0.01 [CV 3/10; 1/30] END krr__alpha=0.01, krr__gamma=10, scaling=StandardScaler(), variance_treshold__threshold=100;, score=-2.483 total time= 4.8min [CV 4/10; 1/30] END krr__alpha=0.01, krr__gamma=10, scaling=StandardScaler(), variance_treshold__threshold=100;, score=-2.265 total time= 4.8min [CV 3/10; 2/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=0.01 [CV 4/10; 2/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=0.01 [CV 5/10; 1/30] END krr__alpha=0.01, krr__gamma=10, scaling=StandardScaler(), variance_treshold__threshold=100;, score=-2.269 total time= 4.8min [CV 2/10; 1/30] END krr__alpha=0.01, krr__gamma=10, scaling=StandardScaler(), variance_treshold__threshold=100;, score=-2.350 total time= 4.8min [CV 6/10; 1/30] END krr__alpha=0.01, krr__gamma=10, scaling=StandardScaler(), variance_treshold__threshold=100;, score=-2.372 total time= 4.8min [CV 5/10; 2/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=0.01 [CV 6/10; 2/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=0.01 [CV 7/10; 2/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=0.01 [CV 10/10; 1/30] END krr__alpha=0.01, krr__gamma=10, scaling=StandardScaler(), variance_treshold__threshold=100;, score=-2.345 total time= 1.2min [CV 8/10; 2/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=0.01 [CV 1/10; 2/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=0.01;, score=-1.053 total time= 1.9min [CV 9/10; 2/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=0.01 [CV 5/10; 2/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=0.01;, score=-1.083 total time= 5.0min [CV 10/10; 2/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=0.01 [CV 9/10; 2/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=0.01;, score=-1.131 total time= 4.0min [CV 4/10; 2/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=0.01;, score=-1.119 total time= 5.8min [CV 1/10; 3/30] START krr__alpha=0.001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 2/10; 3/30] START krr__alpha=0.001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 2/10; 2/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=0.01;, score=-1.113 total time= 6.6min [CV 3/10; 3/30] START krr__alpha=0.001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 2/10; 3/30] END krr__alpha=0.001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=100;, score=0.959 total time= 2.0min [CV 4/10; 3/30] START krr__alpha=0.001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 10/10; 2/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=0.01;, score=-1.117 total time= 3.5min [CV 5/10; 3/30] START krr__alpha=0.001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 8/10; 2/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=0.01;, score=-1.063 total time= 8.3min [CV 3/10; 2/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=0.01;, score=-1.243 total time= 9.0min [CV 6/10; 3/30] START krr__alpha=0.001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 7/10; 3/30] START krr__alpha=0.001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 6/10; 2/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=0.01;, score=-1.171 total time= 9.7min [CV 8/10; 3/30] START krr__alpha=0.001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 4/10; 3/30] END krr__alpha=0.001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=100;, score=0.960 total time= 2.4min [CV 9/10; 3/30] START krr__alpha=0.001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 1/10; 3/30] END krr__alpha=0.001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=100;, score=0.969 total time= 4.5min [CV 10/10; 3/30] START krr__alpha=0.001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 8/10; 3/30] END krr__alpha=0.001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=100;, score=0.947 total time= 1.9min [CV 7/10; 2/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=0.01;, score=-1.164 total time=11.7min [CV 1/10; 4/30] START krr__alpha=0.1, krr__gamma=1, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 2/10; 4/30] START krr__alpha=0.1, krr__gamma=1, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 3/10; 3/30] END krr__alpha=0.001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=100;, score=0.955 total time= 6.2min [CV 5/10; 3/30] END krr__alpha=0.001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=100;, score=0.942 total time= 4.2min [CV 3/10; 4/30] START krr__alpha=0.1, krr__gamma=1, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 4/10; 4/30] START krr__alpha=0.1, krr__gamma=1, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 6/10; 3/30] END krr__alpha=0.001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=100;, score=0.959 total time= 5.4min [CV 5/10; 4/30] START krr__alpha=0.1, krr__gamma=1, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 10/10; 3/30] END krr__alpha=0.001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=100;, score=0.945 total time= 4.8min [CV 9/10; 3/30] END krr__alpha=0.001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=100;, score=0.961 total time= 4.9min [CV 6/10; 4/30] START krr__alpha=0.1, krr__gamma=1, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 7/10; 4/30] START krr__alpha=0.1, krr__gamma=1, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 7/10; 3/30] END krr__alpha=0.001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=100;, score=0.968 total time= 6.8min [CV 8/10; 4/30] START krr__alpha=0.1, krr__gamma=1, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 8/10; 4/30] END krr__alpha=0.1, krr__gamma=1, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.956 total time= 1.6min [CV 9/10; 4/30] START krr__alpha=0.1, krr__gamma=1, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 4/10; 4/30] END krr__alpha=0.1, krr__gamma=1, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.967 total time= 5.5min [CV 1/10; 4/30] END krr__alpha=0.1, krr__gamma=1, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.975 total time= 6.6min [CV 10/10; 4/30] START krr__alpha=0.1, krr__gamma=1, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 1/10; 5/30] START krr__alpha=0.01, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 2/10; 4/30] END krr__alpha=0.1, krr__gamma=1, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.967 total time= 6.7min [CV 2/10; 5/30] START krr__alpha=0.01, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 3/10; 4/30] END krr__alpha=0.1, krr__gamma=1, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.970 total time= 5.7min [CV 3/10; 5/30] START krr__alpha=0.01, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 7/10; 4/30] END krr__alpha=0.1, krr__gamma=1, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.975 total time= 4.2min [CV 4/10; 5/30] START krr__alpha=0.01, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 5/10; 4/30] END krr__alpha=0.1, krr__gamma=1, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.953 total time= 5.1min [CV 5/10; 5/30] START krr__alpha=0.01, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 9/10; 4/30] END krr__alpha=0.1, krr__gamma=1, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.968 total time= 2.1min [CV 6/10; 5/30] START krr__alpha=0.01, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 6/10; 4/30] END krr__alpha=0.1, krr__gamma=1, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.970 total time= 5.4min [CV 7/10; 5/30] START krr__alpha=0.01, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 7/10; 5/30] END krr__alpha=0.01, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.971 total time= 3.1min [CV 8/10; 5/30] START krr__alpha=0.01, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 10/10; 4/30] END krr__alpha=0.1, krr__gamma=1, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.954 total time= 6.2min [CV 9/10; 5/30] START krr__alpha=0.01, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 6/10; 5/30] END krr__alpha=0.01, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.973 total time= 4.9min [CV 3/10; 5/30] END krr__alpha=0.01, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.973 total time= 6.0min [CV 10/10; 5/30] START krr__alpha=0.01, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 1/10; 6/30] START krr__alpha=0.1, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 4/10; 5/30] END krr__alpha=0.01, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.973 total time= 6.3min [CV 2/10; 5/30] END krr__alpha=0.01, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.973 total time= 7.2min [CV 1/10; 5/30] END krr__alpha=0.01, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.975 total time= 7.4min [CV 2/10; 6/30] START krr__alpha=0.1, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 3/10; 6/30] START krr__alpha=0.1, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 4/10; 6/30] START krr__alpha=0.1, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 5/10; 5/30] END krr__alpha=0.01, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.970 total time= 6.5min [CV 8/10; 5/30] END krr__alpha=0.01, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.973 total time= 2.2min [CV 5/10; 6/30] START krr__alpha=0.1, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 6/10; 6/30] START krr__alpha=0.1, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 10/10; 5/30] END krr__alpha=0.01, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.976 total time= 2.2min [CV 7/10; 6/30] START krr__alpha=0.1, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 9/10; 5/30] END krr__alpha=0.01, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.973 total time= 4.2min [CV 8/10; 6/30] START krr__alpha=0.1, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 7/10; 6/30] END krr__alpha=0.1, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.587 total time= 3.1min [CV 9/10; 6/30] START krr__alpha=0.1, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 6/10; 6/30] END krr__alpha=0.1, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.650 total time= 4.1min [CV 10/10; 6/30] START krr__alpha=0.1, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 10/10; 6/30] END krr__alpha=0.1, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.619 total time= 50.8s [CV 1/10; 7/30] START krr__alpha=0.0001, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 5/10; 6/30] END krr__alpha=0.1, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.611 total time= 5.0min [CV 2/10; 7/30] START krr__alpha=0.0001, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 8/10; 6/30] END krr__alpha=0.1, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.603 total time= 2.4min [CV 3/10; 7/30] START krr__alpha=0.0001, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 3/10; 6/30] END krr__alpha=0.1, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.629 total time= 6.2min [CV 4/10; 7/30] START krr__alpha=0.0001, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 1/10; 6/30] END krr__alpha=0.1, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.621 total time= 8.9min [CV 5/10; 7/30] START krr__alpha=0.0001, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 4/10; 6/30] END krr__alpha=0.1, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.672 total time= 8.2min [CV 6/10; 7/30] START krr__alpha=0.0001, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 2/10; 6/30] END krr__alpha=0.1, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.657 total time= 9.2min [CV 7/10; 7/30] START krr__alpha=0.0001, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 9/10; 6/30] END krr__alpha=0.1, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.617 total time= 6.1min [CV 8/10; 7/30] START krr__alpha=0.0001, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 3/10; 7/30] END krr__alpha=0.0001, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.609 total time= 5.0min [CV 9/10; 7/30] START krr__alpha=0.0001, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 2/10; 7/30] END krr__alpha=0.0001, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.648 total time= 6.0min [CV 10/10; 7/30] START krr__alpha=0.0001, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 7/10; 7/30] END krr__alpha=0.0001, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.565 total time= 2.9min [CV 1/10; 8/30] START krr__alpha=0.1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 4/10; 7/30] END krr__alpha=0.0001, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.651 total time= 6.0min [CV 2/10; 8/30] START krr__alpha=0.1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 9/10; 7/30] END krr__alpha=0.0001, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.602 total time= 2.9min [CV 3/10; 8/30] START krr__alpha=0.1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 10/10; 7/30] END krr__alpha=0.0001, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.604 total time= 2.2min [CV 4/10; 8/30] START krr__alpha=0.1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 8/10; 7/30] END krr__alpha=0.0001, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.586 total time= 4.0min [CV 6/10; 7/30] END krr__alpha=0.0001, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.634 total time= 6.0min [CV 5/10; 8/30] START krr__alpha=0.1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 6/10; 8/30] START krr__alpha=0.1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 1/10; 7/30] END krr__alpha=0.0001, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.602 total time= 9.6min [CV 7/10; 8/30] START krr__alpha=0.1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 5/10; 7/30] END krr__alpha=0.0001, krr__gamma=0.1, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.593 total time= 7.4min [CV 8/10; 8/30] START krr__alpha=0.1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 2/10; 8/30] END krr__alpha=0.1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.789 total time= 3.7min [CV 9/10; 8/30] START krr__alpha=0.1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 8/10; 8/30] END krr__alpha=0.1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.725 total time=12.9min [CV 10/10; 8/30] START krr__alpha=0.1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 1/10; 8/30] END krr__alpha=0.1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.759 total time=16.5min [CV 1/10; 9/30] START krr__alpha=0.0001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 4/10; 8/30] END krr__alpha=0.1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.788 total time=15.4min [CV 2/10; 9/30] START krr__alpha=0.0001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 9/10; 8/30] END krr__alpha=0.1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.743 total time=13.3min [CV 3/10; 9/30] START krr__alpha=0.0001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 10/10; 8/30] END krr__alpha=0.1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.746 total time= 1.2min [CV 4/10; 9/30] START krr__alpha=0.0001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 1/10; 9/30] END krr__alpha=0.0001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.977 total time= 1.8min [CV 5/10; 9/30] START krr__alpha=0.0001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 3/10; 8/30] END krr__alpha=0.1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.773 total time=17.1min [CV 6/10; 9/30] START krr__alpha=0.0001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 4/10; 9/30] END krr__alpha=0.0001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.968 total time= 2.2min [CV 7/10; 9/30] START krr__alpha=0.0001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 7/10; 8/30] END krr__alpha=0.1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.758 total time=16.8min [CV 8/10; 9/30] START krr__alpha=0.0001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 2/10; 9/30] END krr__alpha=0.0001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.973 total time= 2.7min [CV 9/10; 9/30] START krr__alpha=0.0001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 6/10; 8/30] END krr__alpha=0.1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.785 total time=17.7min [CV 10/10; 9/30] START krr__alpha=0.0001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 5/10; 8/30] END krr__alpha=0.1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.716 total time=18.2min [CV 1/10; 10/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=1e-08 [CV 3/10; 9/30] END krr__alpha=0.0001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.964 total time= 5.0min [CV 2/10; 10/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=1e-08 [CV 5/10; 9/30] END krr__alpha=0.0001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.963 total time= 4.8min [CV 6/10; 9/30] END krr__alpha=0.0001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.973 total time= 4.8min [CV 3/10; 10/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=1e-08 [CV 4/10; 10/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=1e-08 [CV 9/10; 9/30] END krr__alpha=0.0001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.974 total time= 4.3min [CV 5/10; 10/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=1e-08 [CV 8/10; 9/30] END krr__alpha=0.0001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.969 total time= 5.5min [CV 6/10; 10/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=1e-08 [CV 1/10; 10/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=1e-08;, score=-1.053 total time= 5.2min [CV 7/10; 10/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=1e-08 [CV 10/10; 9/30] END krr__alpha=0.0001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.973 total time= 5.9min [CV 8/10; 10/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=1e-08 [CV 6/10; 10/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=1e-08;, score=-1.171 total time= 1.4min [CV 9/10; 10/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=1e-08 [CV 8/10; 10/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=1e-08;, score=-1.063 total time= 53.5s [CV 10/10; 10/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=1e-08 [CV 9/10; 10/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=1e-08;, score=-1.131 total time= 1.3min [CV 1/10; 11/30] START krr__alpha=0.01, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0.1 [CV 7/10; 9/30] END krr__alpha=0.0001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.972 total time= 9.0min [CV 2/10; 11/30] START krr__alpha=0.01, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0.1 [CV 2/10; 10/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=1e-08;, score=-1.113 total time= 6.2min [CV 3/10; 11/30] START krr__alpha=0.01, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0.1 [CV 3/10; 10/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=1e-08;, score=-1.243 total time= 5.9min [CV 4/10; 11/30] START krr__alpha=0.01, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0.1 [CV 5/10; 10/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=1e-08;, score=-1.083 total time= 6.1min [CV 5/10; 11/30] START krr__alpha=0.01, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0.1 [CV 4/10; 10/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=1e-08;, score=-1.119 total time= 7.0min [CV 6/10; 11/30] START krr__alpha=0.01, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0.1 [CV 10/10; 10/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=1e-08;, score=-1.117 total time= 3.7min [CV 7/10; 11/30] START krr__alpha=0.01, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0.1 [CV 1/10; 11/30] END krr__alpha=0.01, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0.1;, score=0.927 total time= 2.9min [CV 8/10; 11/30] START krr__alpha=0.01, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0.1 [CV 4/10; 11/30] END krr__alpha=0.01, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0.1;, score=0.918 total time= 1.9min [CV 9/10; 11/30] START krr__alpha=0.01, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0.1 [CV 2/10; 11/30] END krr__alpha=0.01, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0.1;, score=0.925 total time= 2.8min [CV 10/10; 11/30] START krr__alpha=0.01, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0.1 [CV 6/10; 11/30] END krr__alpha=0.01, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0.1;, score=0.924 total time= 1.9min [CV 1/10; 12/30] START krr__alpha=0.01, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.1 [CV 7/10; 10/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=1e-08;, score=-1.164 total time= 7.4min [CV 2/10; 12/30] START krr__alpha=0.01, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.1 [CV 3/10; 11/30] END krr__alpha=0.01, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0.1;, score=0.919 total time= 8.8min [CV 3/10; 12/30] START krr__alpha=0.01, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.1 [CV 8/10; 11/30] END krr__alpha=0.01, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0.1;, score=0.926 total time= 7.5min [CV 4/10; 12/30] START krr__alpha=0.01, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.1 [CV 10/10; 11/30] END krr__alpha=0.01, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0.1;, score=0.929 total time= 7.3min [CV 5/10; 12/30] START krr__alpha=0.01, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.1 [CV 3/10; 12/30] END krr__alpha=0.01, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.1;, score=0.967 total time= 2.8min [CV 5/10; 11/30] END krr__alpha=0.01, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0.1;, score=0.921 total time=10.1min [CV 6/10; 12/30] START krr__alpha=0.01, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.1 [CV 7/10; 12/30] START krr__alpha=0.01, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.1 [CV 9/10; 11/30] END krr__alpha=0.01, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0.1;, score=0.923 total time=10.8min [CV 8/10; 12/30] START krr__alpha=0.01, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.1 [CV 7/10; 11/30] END krr__alpha=0.01, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0.1;, score=0.920 total time=11.4min [CV 1/10; 12/30] END krr__alpha=0.01, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.1;, score=0.969 total time= 9.8min [CV 9/10; 12/30] START krr__alpha=0.01, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.1 [CV 10/10; 12/30] START krr__alpha=0.01, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.1 [CV 2/10; 12/30] END krr__alpha=0.01, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.1;, score=0.967 total time=10.2min [CV 4/10; 12/30] END krr__alpha=0.01, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.1;, score=0.965 total time= 5.1min [CV 1/10; 13/30] START krr__alpha=1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01 [CV 2/10; 13/30] START krr__alpha=1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01 [CV 5/10; 12/30] END krr__alpha=0.01, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.1;, score=0.961 total time= 5.6min [CV 3/10; 13/30] START krr__alpha=1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01 [CV 7/10; 12/30] END krr__alpha=0.01, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.1;, score=0.963 total time= 5.4min [CV 4/10; 13/30] START krr__alpha=1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01 [CV 9/10; 12/30] END krr__alpha=0.01, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.1;, score=0.965 total time= 5.0min [CV 5/10; 13/30] START krr__alpha=1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01 [CV 3/10; 13/30] END krr__alpha=1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01;, score=0.645 total time= 3.7min [CV 6/10; 13/30] START krr__alpha=1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01 [CV 6/10; 12/30] END krr__alpha=0.01, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.1;, score=0.967 total time= 8.3min [CV 8/10; 12/30] END krr__alpha=0.01, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.1;, score=0.967 total time= 6.7min [CV 7/10; 13/30] START krr__alpha=1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01 [CV 8/10; 13/30] START krr__alpha=1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01 [CV 5/10; 13/30] END krr__alpha=1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01;, score=0.598 total time= 2.6min [CV 9/10; 13/30] START krr__alpha=1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01 [CV 2/10; 13/30] END krr__alpha=1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01;, score=0.675 total time= 6.2min [CV 10/10; 13/30] START krr__alpha=1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01 [CV 10/10; 12/30] END krr__alpha=0.01, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.1;, score=0.968 total time= 8.2min [CV 1/10; 14/30] START krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=1e-05 [CV 1/10; 13/30] END krr__alpha=1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01;, score=0.634 total time= 7.7min [CV 2/10; 14/30] START krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=1e-05 [CV 6/10; 13/30] END krr__alpha=1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01;, score=0.668 total time= 3.3min[CV 4/10; 13/30] END krr__alpha=1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01;, score=0.660 total time= 5.7min [CV 3/10; 14/30] START krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=1e-05 [CV 4/10; 14/30] START krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=1e-05 [CV 8/10; 13/30] END krr__alpha=1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01;, score=0.603 total time= 4.4min [CV 5/10; 14/30] START krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=1e-05 [CV 7/10; 13/30] END krr__alpha=1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01;, score=0.624 total time= 4.5min [CV 6/10; 14/30] START krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=1e-05 [CV 2/10; 14/30] END krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=1e-05;, score=0.764 total time= 4.2min [CV 7/10; 14/30] START krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=1e-05 [CV 9/10; 13/30] END krr__alpha=1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01;, score=0.619 total time= 6.4min [CV 8/10; 14/30] START krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=1e-05 [CV 10/10; 13/30] END krr__alpha=1, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01;, score=0.620 total time= 6.8min [CV 9/10; 14/30] START krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=1e-05 [CV 1/10; 14/30] END krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=1e-05;, score=0.722 total time= 6.3min [CV 5/10; 14/30] END krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=1e-05;, score=0.683 total time= 3.6min [CV 10/10; 14/30] START krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=1e-05 [CV 1/10; 15/30] START krr__alpha=0.001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.001 [CV 3/10; 14/30] END krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=1e-05;, score=0.737 total time= 5.3min [CV 2/10; 15/30] START krr__alpha=0.001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.001 [CV 6/10; 14/30] END krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=1e-05;, score=0.752 total time= 4.3min [CV 4/10; 14/30] END krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=1e-05;, score=0.753 total time= 6.0min [CV 3/10; 15/30] START krr__alpha=0.001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.001 [CV 4/10; 15/30] START krr__alpha=0.001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.001 [CV 7/10; 14/30] END krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=1e-05;, score=0.714 total time= 3.3min [CV 5/10; 15/30] START krr__alpha=0.001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.001 [CV 8/10; 14/30] END krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=1e-05;, score=0.692 total time= 3.6min [CV 6/10; 15/30] START krr__alpha=0.001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.001 [CV 2/10; 15/30] END krr__alpha=0.001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.001;, score=0.977 total time= 3.2min [CV 7/10; 15/30] START krr__alpha=0.001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.001 [CV 9/10; 14/30] END krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=1e-05;, score=0.713 total time= 6.0min [CV 4/10; 15/30] END krr__alpha=0.001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.001;, score=0.977 total time= 5.1min [CV 8/10; 15/30] START krr__alpha=0.001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.001 [CV 9/10; 15/30] START krr__alpha=0.001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.001 [CV 9/10; 15/30] END krr__alpha=0.001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.001;, score=0.977 total time= 1.1min [CV 10/10; 15/30] START krr__alpha=0.001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.001 [CV 5/10; 15/30] END krr__alpha=0.001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.001;, score=0.970 total time= 5.0min [CV 1/10; 15/30] END krr__alpha=0.001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.001;, score=0.979 total time= 7.2min [CV 1/10; 16/30] START krr__alpha=1, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 2/10; 16/30] START krr__alpha=1, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 10/10; 14/30] END krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=1e-05;, score=0.712 total time= 8.5min [CV 7/10; 15/30] END krr__alpha=0.001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.001;, score=0.974 total time= 5.2min [CV 3/10; 16/30] START krr__alpha=1, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 4/10; 16/30] START krr__alpha=1, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 3/10; 15/30] END krr__alpha=0.001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.001;, score=0.975 total time= 7.6min [CV 5/10; 16/30] START krr__alpha=1, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 6/10; 15/30] END krr__alpha=0.001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.001;, score=0.976 total time= 5.5min [CV 6/10; 16/30] START krr__alpha=1, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 8/10; 15/30] END krr__alpha=0.001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.001;, score=0.975 total time= 4.9min [CV 7/10; 16/30] START krr__alpha=1, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 6/10; 16/30] END krr__alpha=1, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.937 total time= 3.3min [CV 8/10; 16/30] START krr__alpha=1, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 10/10; 15/30] END krr__alpha=0.001, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=0.001;, score=0.977 total time= 5.0min [CV 9/10; 16/30] START krr__alpha=1, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 2/10; 16/30] END krr__alpha=1, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.937 total time= 5.1min [CV 1/10; 16/30] END krr__alpha=1, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.940 total time= 5.1min [CV 10/10; 16/30] START krr__alpha=1, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 1/10; 17/30] START krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 5/10; 16/30] END krr__alpha=1, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.930 total time= 5.0min [CV 2/10; 17/30] START krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 4/10; 16/30] END krr__alpha=1, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.930 total time= 5.8min [CV 3/10; 16/30] END krr__alpha=1, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.931 total time= 5.8min [CV 3/10; 17/30] START krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 4/10; 17/30] START krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 7/10; 16/30] END krr__alpha=1, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.933 total time= 3.5min [CV 5/10; 17/30] START krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 9/10; 16/30] END krr__alpha=1, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.934 total time= 2.7min [CV 6/10; 17/30] START krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 2/10; 17/30] END krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.952 total time= 2.4min [CV 7/10; 17/30] START krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 8/10; 16/30] END krr__alpha=1, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.938 total time= 5.1min [CV 8/10; 17/30] START krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 10/10; 16/30] END krr__alpha=1, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.939 total time= 5.3min [CV 9/10; 17/30] START krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 1/10; 17/30] END krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.956 total time= 5.4min [CV 10/10; 17/30] START krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 6/10; 17/30] END krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.952 total time= 3.8min [CV 1/10; 18/30] START krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 7/10; 17/30] END krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.948 total time= 2.7min [CV 2/10; 18/30] START krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 8/10; 17/30] END krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.955 total time= 3.7min [CV 3/10; 18/30] START krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 10/10; 17/30] END krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.956 total time= 4.0min [CV 3/10; 17/30] END krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.947 total time= 7.5min [CV 5/10; 17/30] END krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.950 total time= 7.5min [CV 4/10; 18/30] START krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 4/10; 17/30] END krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.947 total time= 7.5min [CV 5/10; 18/30] START krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 6/10; 18/30] START krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 7/10; 18/30] START krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 9/10; 17/30] END krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.951 total time= 5.5min [CV 8/10; 18/30] START krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 2/10; 18/30] END krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.952 total time= 5.1min [CV 1/10; 18/30] END krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.956 total time= 5.2min [CV 9/10; 18/30] START krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 10/10; 18/30] START krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 7/10; 18/30] END krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.948 total time= 3.3min [CV 1/10; 19/30] START krr__alpha=1, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 3/10; 18/30] END krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.947 total time= 5.3min [CV 2/10; 19/30] START krr__alpha=1, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 5/10; 18/30] END krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.950 total time= 6.3min [CV 3/10; 19/30] START krr__alpha=1, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 6/10; 18/30] END krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.952 total time= 6.9min [CV 4/10; 19/30] START krr__alpha=1, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 4/10; 18/30] END krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.947 total time= 7.1min [CV 5/10; 19/30] START krr__alpha=1, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 10/10; 18/30] END krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.956 total time= 6.5min [CV 6/10; 19/30] START krr__alpha=1, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 8/10; 18/30] END krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.955 total time= 7.5min [CV 7/10; 19/30] START krr__alpha=1, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 1/10; 19/30] END krr__alpha=1, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.907 total time= 6.2min [CV 8/10; 19/30] START krr__alpha=1, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 3/10; 19/30] END krr__alpha=1, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.902 total time= 3.3min [CV 9/10; 19/30] START krr__alpha=1, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 7/10; 19/30] END krr__alpha=1, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.903 total time= 1.7min [CV 10/10; 19/30] START krr__alpha=1, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001 [CV 5/10; 19/30] END krr__alpha=1, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.898 total time= 3.5min [CV 1/10; 20/30] START krr__alpha=0.001, krr__gamma=100, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 2/10; 19/30] END krr__alpha=1, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.907 total time= 6.4min [CV 2/10; 20/30] START krr__alpha=0.001, krr__gamma=100, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 9/10; 18/30] END krr__alpha=0.01, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.951 total time= 8.7min [CV 3/10; 20/30] START krr__alpha=0.001, krr__gamma=100, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 6/10; 19/30] END krr__alpha=1, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.906 total time= 3.4min [CV 4/10; 19/30] END krr__alpha=1, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.896 total time= 4.9min [CV 4/10; 20/30] START krr__alpha=0.001, krr__gamma=100, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 5/10; 20/30] START krr__alpha=0.001, krr__gamma=100, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 3/10; 20/30] END krr__alpha=0.001, krr__gamma=100, scaling=StandardScaler(), variance_treshold__threshold=1;, score=-2.684 total time= 1.8min [CV 6/10; 20/30] START krr__alpha=0.001, krr__gamma=100, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 1/10; 20/30] END krr__alpha=0.001, krr__gamma=100, scaling=StandardScaler(), variance_treshold__threshold=1;, score=-2.536 total time= 2.2min [CV 7/10; 20/30] START krr__alpha=0.001, krr__gamma=100, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 2/10; 20/30] END krr__alpha=0.001, krr__gamma=100, scaling=StandardScaler(), variance_treshold__threshold=1;, score=-2.529 total time= 2.2min [CV 8/10; 20/30] START krr__alpha=0.001, krr__gamma=100, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 4/10; 20/30] END krr__alpha=0.001, krr__gamma=100, scaling=StandardScaler(), variance_treshold__threshold=1;, score=-2.472 total time= 1.1min [CV 5/10; 20/30] END krr__alpha=0.001, krr__gamma=100, scaling=StandardScaler(), variance_treshold__threshold=1;, score=-2.469 total time= 1.1min [CV 9/10; 20/30] START krr__alpha=0.001, krr__gamma=100, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 10/10; 20/30] START krr__alpha=0.001, krr__gamma=100, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 8/10; 19/30] END krr__alpha=1, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.906 total time= 3.7min [CV 9/10; 19/30] END krr__alpha=1, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.901 total time= 3.6min [CV 1/10; 21/30] START krr__alpha=0.0001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 2/10; 21/30] START krr__alpha=0.0001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 10/10; 19/30] END krr__alpha=1, krr__gamma=0.01, scaling=MinMaxScaler(), variance_treshold__threshold=0.001;, score=0.909 total time= 2.6min [CV 3/10; 21/30] START krr__alpha=0.0001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 6/10; 20/30] END krr__alpha=0.001, krr__gamma=100, scaling=StandardScaler(), variance_treshold__threshold=1;, score=-2.573 total time= 53.3s [CV 4/10; 21/30] START krr__alpha=0.0001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 7/10; 20/30] END krr__alpha=0.001, krr__gamma=100, scaling=StandardScaler(), variance_treshold__threshold=1;, score=-2.531 total time= 53.3s [CV 8/10; 20/30] END krr__alpha=0.001, krr__gamma=100, scaling=StandardScaler(), variance_treshold__threshold=1;, score=-2.633 total time= 52.9s [CV 5/10; 21/30] START krr__alpha=0.0001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 6/10; 21/30] START krr__alpha=0.0001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 9/10; 20/30] END krr__alpha=0.001, krr__gamma=100, scaling=StandardScaler(), variance_treshold__threshold=1;, score=-2.494 total time= 1.0min [CV 10/10; 20/30] END krr__alpha=0.001, krr__gamma=100, scaling=StandardScaler(), variance_treshold__threshold=1;, score=-2.553 total time= 1.0min [CV 7/10; 21/30] START krr__alpha=0.0001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 8/10; 21/30] START krr__alpha=0.0001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 4/10; 21/30] END krr__alpha=0.0001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.982 total time= 2.4min [CV 9/10; 21/30] START krr__alpha=0.0001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 3/10; 21/30] END krr__alpha=0.0001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.973 total time= 3.8min [CV 8/10; 21/30] END krr__alpha=0.0001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.978 total time= 3.0min [CV 10/10; 21/30] START krr__alpha=0.0001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 1/10; 22/30] START krr__alpha=0.0001, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 9/10; 21/30] END krr__alpha=0.0001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.981 total time= 1.4min [CV 2/10; 22/30] START krr__alpha=0.0001, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 1/10; 21/30] END krr__alpha=0.0001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.984 total time= 7.2min [CV 2/10; 21/30] END krr__alpha=0.0001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.980 total time= 7.2min [CV 3/10; 22/30] START krr__alpha=0.0001, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 4/10; 22/30] START krr__alpha=0.0001, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 6/10; 21/30] END krr__alpha=0.0001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.979 total time= 7.9min [CV 5/10; 22/30] START krr__alpha=0.0001, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 5/10; 21/30] END krr__alpha=0.0001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.970 total time= 8.0min [CV 7/10; 21/30] END krr__alpha=0.0001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.980 total time= 7.7min [CV 6/10; 22/30] START krr__alpha=0.0001, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 7/10; 22/30] START krr__alpha=0.0001, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 1/10; 22/30] END krr__alpha=0.0001, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.948 total time= 5.9min [CV 8/10; 22/30] START krr__alpha=0.0001, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 10/10; 21/30] END krr__alpha=0.0001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.979 total time= 8.8min [CV 2/10; 22/30] END krr__alpha=0.0001, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.941 total time= 8.7min [CV 9/10; 22/30] START krr__alpha=0.0001, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 10/10; 22/30] START krr__alpha=0.0001, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=10 [CV 4/10; 22/30] END krr__alpha=0.0001, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.937 total time= 5.5min [CV 1/10; 23/30] START krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.1 [CV 3/10; 22/30] END krr__alpha=0.0001, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.934 total time= 6.9min [CV 6/10; 22/30] END krr__alpha=0.0001, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.944 total time= 5.6min [CV 2/10; 23/30] START krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.1 [CV 3/10; 23/30] START krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.1 [CV 5/10; 22/30] END krr__alpha=0.0001, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.938 total time= 7.0min [CV 7/10; 22/30] END krr__alpha=0.0001, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.941 total time= 7.0min [CV 4/10; 23/30] START krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.1 [CV 5/10; 23/30] START krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.1 [CV 8/10; 22/30] END krr__alpha=0.0001, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.941 total time= 8.2min [CV 10/10; 22/30] END krr__alpha=0.0001, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.947 total time= 5.3min [CV 6/10; 23/30] START krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.1 [CV 7/10; 23/30] START krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.1 [CV 1/10; 23/30] END krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.1;, score=0.728 total time= 5.4min [CV 8/10; 23/30] START krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.1 [CV 9/10; 22/30] END krr__alpha=0.0001, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=10;, score=0.941 total time= 6.5min [CV 9/10; 23/30] START krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.1 [CV 5/10; 23/30] END krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.1;, score=0.689 total time= 5.2min [CV 10/10; 23/30] START krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.1 [CV 4/10; 23/30] END krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.1;, score=0.759 total time= 5.3min [CV 2/10; 23/30] END krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.1;, score=0.770 total time= 6.7min [CV 1/10; 24/30] START krr__alpha=0.1, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0 [CV 2/10; 24/30] START krr__alpha=0.1, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0 [CV 2/10; 24/30] END krr__alpha=0.1, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0;, score=0.904 total time= 1.1min [CV 3/10; 24/30] START krr__alpha=0.1, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0 [CV 3/10; 23/30] END krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.1;, score=0.744 total time= 8.3min [CV 4/10; 24/30] START krr__alpha=0.1, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0 [CV 8/10; 23/30] END krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.1;, score=0.698 total time= 5.1min [CV 5/10; 24/30] START krr__alpha=0.1, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0 [CV 6/10; 23/30] END krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.1;, score=0.758 total time= 6.8min [CV 7/10; 23/30] END krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.1;, score=0.720 total time= 6.8min [CV 6/10; 24/30] START krr__alpha=0.1, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0 [CV 7/10; 24/30] START krr__alpha=0.1, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0 [CV 4/10; 24/30] END krr__alpha=0.1, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0;, score=0.893 total time= 4.5min [CV 9/10; 23/30] END krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.1;, score=0.718 total time= 7.7min [CV 8/10; 24/30] START krr__alpha=0.1, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0 [CV 9/10; 24/30] START krr__alpha=0.1, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0 [CV 5/10; 24/30] END krr__alpha=0.1, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0;, score=0.896 total time= 4.7min [CV 10/10; 24/30] START krr__alpha=0.1, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0 [CV 1/10; 24/30] END krr__alpha=0.1, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0;, score=0.905 total time= 7.3min [CV 1/10; 25/30] START krr__alpha=0.001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 3/10; 24/30] END krr__alpha=0.1, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0;, score=0.899 total time= 6.3min [CV 2/10; 25/30] START krr__alpha=0.001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 10/10; 23/30] END krr__alpha=0.001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.1;, score=0.718 total time= 8.7min [CV 3/10; 25/30] START krr__alpha=0.001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 7/10; 24/30] END krr__alpha=0.1, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0;, score=0.900 total time= 5.2min [CV 4/10; 25/30] START krr__alpha=0.001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 4/10; 25/30] END krr__alpha=0.001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.980 total time= 1.4min [CV 5/10; 25/30] START krr__alpha=0.001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 6/10; 24/30] END krr__alpha=0.1, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0;, score=0.903 total time= 7.1min [CV 6/10; 25/30] START krr__alpha=0.001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 2/10; 25/30] END krr__alpha=0.001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.980 total time= 4.7min [CV 7/10; 25/30] START krr__alpha=0.001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 9/10; 24/30] END krr__alpha=0.1, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0;, score=0.897 total time= 6.1min [CV 8/10; 25/30] START krr__alpha=0.001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 6/10; 25/30] END krr__alpha=0.001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.979 total time= 2.0min [CV 9/10; 25/30] START krr__alpha=0.001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 10/10; 24/30] END krr__alpha=0.1, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0;, score=0.906 total time= 6.9min [CV 10/10; 25/30] START krr__alpha=0.001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08 [CV 1/10; 25/30] END krr__alpha=0.001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.982 total time= 7.0min [CV 8/10; 24/30] END krr__alpha=0.1, krr__gamma=0.001, scaling=MinMaxScaler(), variance_treshold__threshold=0;, score=0.903 total time= 8.1min [CV 1/10; 26/30] START krr__alpha=0.1, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 2/10; 26/30] START krr__alpha=0.1, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 8/10; 25/30] END krr__alpha=0.001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.978 total time= 3.1min [CV 3/10; 26/30] START krr__alpha=0.1, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 7/10; 25/30] END krr__alpha=0.001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.978 total time= 4.2min [CV 5/10; 25/30] END krr__alpha=0.001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.975 total time= 5.8min [CV 3/10; 25/30] END krr__alpha=0.001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.977 total time= 7.7min [CV 4/10; 26/30] START krr__alpha=0.1, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 6/10; 26/30] START krr__alpha=0.1, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 5/10; 26/30] START krr__alpha=0.1, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 9/10; 25/30] END krr__alpha=0.001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.981 total time= 4.4min [CV 7/10; 26/30] START krr__alpha=0.1, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 1/10; 26/30] END krr__alpha=0.1, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.957 total time= 4.2min [CV 8/10; 26/30] START krr__alpha=0.1, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 10/10; 25/30] END krr__alpha=0.001, krr__gamma=0.1, scaling=MinMaxScaler(), variance_treshold__threshold=1e-08;, score=0.981 total time= 5.4min [CV 9/10; 26/30] START krr__alpha=0.1, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 8/10; 26/30] END krr__alpha=0.1, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.933 total time= 1.8min [CV 10/10; 26/30] START krr__alpha=0.1, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1 [CV 3/10; 26/30] END krr__alpha=0.1, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.948 total time= 5.2min [CV 1/10; 27/30] START krr__alpha=0.1, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 2/10; 26/30] END krr__alpha=0.1, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.952 total time= 7.4min [CV 2/10; 27/30] START krr__alpha=0.1, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 5/10; 26/30] END krr__alpha=0.1, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.932 total time= 6.4min [CV 3/10; 27/30] START krr__alpha=0.1, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 4/10; 26/30] END krr__alpha=0.1, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.946 total time= 6.6min [CV 4/10; 27/30] START krr__alpha=0.1, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 6/10; 26/30] END krr__alpha=0.1, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.952 total time= 7.5min [CV 5/10; 27/30] START krr__alpha=0.1, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 3/10; 27/30] END krr__alpha=0.1, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.955 total time= 2.6min [CV 7/10; 26/30] END krr__alpha=0.1, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.962 total time= 8.0min [CV 6/10; 27/30] START krr__alpha=0.1, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 7/10; 27/30] START krr__alpha=0.1, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 10/10; 26/30] END krr__alpha=0.1, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.935 total time= 5.1min [CV 8/10; 27/30] START krr__alpha=0.1, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 9/10; 26/30] END krr__alpha=0.1, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1;, score=0.950 total time= 7.0min [CV 4/10; 27/30] END krr__alpha=0.1, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.953 total time= 3.5min [CV 10/10; 27/30] START krr__alpha=0.1, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 9/10; 27/30] START krr__alpha=0.1, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 1/10; 27/30] END krr__alpha=0.1, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.959 total time= 6.1min [CV 1/10; 28/30] START krr__alpha=0.0001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 1/10; 28/30] END krr__alpha=0.0001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.965 total time= 1.8min [CV 2/10; 28/30] START krr__alpha=0.0001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 8/10; 27/30] END krr__alpha=0.1, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.957 total time= 3.1min [CV 3/10; 28/30] START krr__alpha=0.0001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 2/10; 27/30] END krr__alpha=0.1, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.957 total time= 7.0min [CV 4/10; 28/30] START krr__alpha=0.0001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 5/10; 27/30] END krr__alpha=0.1, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.951 total time= 5.3min [CV 5/10; 28/30] START krr__alpha=0.0001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 10/10; 27/30] END krr__alpha=0.1, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.958 total time= 3.6min [CV 6/10; 28/30] START krr__alpha=0.0001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 9/10; 27/30] END krr__alpha=0.1, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.954 total time= 3.9min [CV 7/10; 28/30] START krr__alpha=0.0001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 6/10; 27/30] END krr__alpha=0.1, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.957 total time= 7.4min [CV 8/10; 28/30] START krr__alpha=0.0001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 7/10; 27/30] END krr__alpha=0.1, krr__gamma=0.001, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.954 total time= 7.8min [CV 9/10; 28/30] START krr__alpha=0.0001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 5/10; 28/30] END krr__alpha=0.0001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.935 total time= 4.8min [CV 10/10; 28/30] START krr__alpha=0.0001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1e-05 [CV 3/10; 28/30] END krr__alpha=0.0001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.949 total time= 5.4min [CV 1/10; 29/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 4/10; 28/30] END krr__alpha=0.0001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.948 total time= 5.3min [CV 2/10; 28/30] END krr__alpha=0.0001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.955 total time= 5.4min [CV 2/10; 29/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 3/10; 29/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 9/10; 28/30] END krr__alpha=0.0001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.951 total time= 49.0s [CV 4/10; 29/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 7/10; 28/30] END krr__alpha=0.0001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.963 total time= 5.0min [CV 5/10; 29/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 6/10; 28/30] END krr__alpha=0.0001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.952 total time= 5.4min [CV 6/10; 29/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 8/10; 28/30] END krr__alpha=0.0001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.938 total time= 5.1min [CV 7/10; 29/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 2/10; 29/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=100;, score=-0.295 total time= 4.7min [CV 8/10; 29/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 1/10; 29/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=100;, score=-0.258 total time= 4.8min [CV 5/10; 29/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=100;, score=-0.267 total time= 3.3min [CV 4/10; 29/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=100;, score=-0.308 total time= 4.7min [CV 9/10; 29/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 10/10; 29/30] START krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=100 [CV 1/10; 30/30] START krr__alpha=0.0001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01 [CV 3/10; 29/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=100;, score=-0.369 total time= 5.5min [CV 2/10; 30/30] START krr__alpha=0.0001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01 [CV 6/10; 29/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=100;, score=-0.332 total time= 5.0min [CV 10/10; 28/30] END krr__alpha=0.0001, krr__gamma=0.01, scaling=StandardScaler(), variance_treshold__threshold=1e-05;, score=0.938 total time= 6.6min [CV 3/10; 30/30] START krr__alpha=0.0001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01 [CV 4/10; 30/30] START krr__alpha=0.0001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01 [CV 2/10; 30/30] END krr__alpha=0.0001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01;, score=0.763 total time= 2.4min [CV 8/10; 29/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=100;, score=-0.299 total time= 3.3min [CV 5/10; 30/30] START krr__alpha=0.0001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01 [CV 6/10; 30/30] START krr__alpha=0.0001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01 [CV 7/10; 29/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=100;, score=-0.386 total time= 4.4min [CV 7/10; 30/30] START krr__alpha=0.0001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01 [CV 9/10; 29/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=100;, score=-0.315 total time= 4.2min [CV 10/10; 29/30] END krr__alpha=0.0001, krr__gamma=1, scaling=StandardScaler(), variance_treshold__threshold=100;, score=-0.358 total time= 4.2min [CV 8/10; 30/30] START krr__alpha=0.0001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01 [CV 9/10; 30/30] START krr__alpha=0.0001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01 [CV 1/10; 30/30] END krr__alpha=0.0001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01;, score=0.720 total time= 6.1min [CV 10/10; 30/30] START krr__alpha=0.0001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01 [CV 3/10; 30/30] END krr__alpha=0.0001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01;, score=0.735 total time= 4.9min [CV 4/10; 30/30] END krr__alpha=0.0001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01;, score=0.751 total time= 5.6min [CV 6/10; 30/30] END krr__alpha=0.0001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01;, score=0.750 total time= 4.5min [CV 8/10; 30/30] END krr__alpha=0.0001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01;, score=0.690 total time= 3.5min [CV 9/10; 30/30] END krr__alpha=0.0001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01;, score=0.711 total time= 3.5min [CV 5/10; 30/30] END krr__alpha=0.0001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01;, score=0.682 total time= 4.8min [CV 7/10; 30/30] END krr__alpha=0.0001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01;, score=0.712 total time= 4.3min [CV 10/10; 30/30] END krr__alpha=0.0001, krr__gamma=10, scaling=MinMaxScaler(), variance_treshold__threshold=0.01;, score=0.711 total time= 1.8min
RandomizedSearchCV(cv=10,
estimator=Pipeline(steps=[('variance_treshold',
VarianceThreshold(threshold=0.01)),
('scaling', StandardScaler()),
('krr',
KernelRidge(kernel='rbf'))]),
n_iter=30, n_jobs=-1,
param_distributions={'krr__alpha': [0.0001, 0.001, 0.01, 0.1,
1],
'krr__gamma': [0.001, 0.01, 0.1, 1, 10,
100],
'scaling': [StandardScaler(),
MinMaxScaler()],
'variance_treshold__threshold': [0,
1e-08,
1e-05,
0.001,
0.01,
0.1, 1,
10,
100]},
verbose=10)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomizedSearchCV(cv=10,
estimator=Pipeline(steps=[('variance_treshold',
VarianceThreshold(threshold=0.01)),
('scaling', StandardScaler()),
('krr',
KernelRidge(kernel='rbf'))]),
n_iter=30, n_jobs=-1,
param_distributions={'krr__alpha': [0.0001, 0.001, 0.01, 0.1,
1],
'krr__gamma': [0.001, 0.01, 0.1, 1, 10,
100],
'scaling': [StandardScaler(),
MinMaxScaler()],
'variance_treshold__threshold': [0,
1e-08,
1e-05,
0.001,
0.01,
0.1, 1,
10,
100]},
verbose=10)Pipeline(steps=[('variance_treshold', VarianceThreshold(threshold=1e-08)),
('scaling', MinMaxScaler()),
('krr', KernelRidge(alpha=0.001, gamma=0.1, kernel='rbf'))])VarianceThreshold(threshold=1e-08)
MinMaxScaler()
KernelRidge(alpha=0.001, gamma=0.1, kernel='rbf')
best_model_threshold = random_variance_treshold.best_estimator_
# get the performance metrics
print(get_regression_metrics(best_model_threshold, df_test_stratified[FEATURES], df_test_stratified[TARGET]))
print(get_regression_metrics(best_model_threshold, df_train_stratified[FEATURES], df_train_stratified[TARGET]))
print(random_variance_treshold.best_params_)
# Create dictionaries with training and test results to create parity plots
res_train_threshold = {
'y true': df_train_stratified[TARGET],
'y pred': best_model_threshold.predict(df_train_stratified[FEATURES])
}
res_test_threshold = {
'y true': df_test_stratified[TARGET],
'y pred': best_model_threshold.predict(df_test_stratified[FEATURES])
}
res_train_threshold['error'] = res_train_threshold['y true'] - res_train_threshold['y pred']
res_test_threshold['error'] = res_test_threshold['y true'] - res_test_threshold['y pred']
# plot it
hv.extension('bokeh')
hex_train = hv.HexTiles(res_train_threshold, ['y true', 'y pred']).hist(dimension=['y true','y pred'])
hex_test = hv.HexTiles(res_test_threshold, ['y true', 'y pred']).hist(dimension=['y true', 'y pred'])
x = np.linspace(0,35, 1000)
line_curve = hv.Curve((x, x), 'x', 'y').opts(color='red', line_dash='dashed')
hex_train * line_curve + hex_test* line_curve
{'mae': np.float64(0.7141159802880948), 'mse': np.float64(1.15361658746815), 'max_error': np.float64(8.393321844653553)}
{'mae': np.float64(0.6235301543475479), 'mse': np.float64(0.8393054326802294), 'max_error': np.float64(6.925115239352241)}
{'variance_treshold__threshold': 1e-08, 'scaling': MinMaxScaler(), 'krr__gamma': 0.1, 'krr__alpha': 0.001}
$\color{DarkBlue}{\textsf{Short Exercise (optional)}}$
- replace the variance threshold with a model-based feature selection
('feature_selection', SelectFromModel(LinearSVC(penalty="l1")))or any feature selection method that you would like to try
from sklearn.svm import LinearSVC
# Define the pipeline
pipe_feature_selection = Pipeline(
# fillme with the pipeline steps
[
('feature_selection', SelectFromModel(LinearSVC(penalty="l1", dual=False))),
('scaling', StandardScaler()),
('krr', KernelRidge(kernel='rbf'))
]
)
param_grid_feature_selection = {
'scaling': [StandardScaler(),MinMaxScaler()],
'feature_selection__estimator__C' : [0.001, 0.01, 0.1, 1, 2 ,3]
}
random_feature_selection = RandomizedSearchCV(pipe_feature_selection, param_distributions=param_grid_feature_selection, n_iter=10,
cv=10, verbose=0, n_jobs=-1)
# Fit the pipeline and run the evaluation
random_feature_selection.fit(df_train_stratified[FEATURES], df_train_stratified[TARGET].astype('int'))
python(24634) MallocStackLogging: can't turn off malloc stack logging because it was not enabled. python(24635) MallocStackLogging: can't turn off malloc stack logging because it was not enabled. python(24636) MallocStackLogging: can't turn off malloc stack logging because it was not enabled. python(24637) MallocStackLogging: can't turn off malloc stack logging because it was not enabled. python(24638) MallocStackLogging: can't turn off malloc stack logging because it was not enabled. python(24639) MallocStackLogging: can't turn off malloc stack logging because it was not enabled. python(24640) MallocStackLogging: can't turn off malloc stack logging because it was not enabled. python(24641) MallocStackLogging: can't turn off malloc stack logging because it was not enabled. /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn( /Users/ludovica/miniconda3/envs/ml_molsim/lib/python3.13/site-packages/sklearn/svm/_base.py:1235: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn(
RandomizedSearchCV(cv=10,
estimator=Pipeline(steps=[('feature_selection',
SelectFromModel(estimator=LinearSVC(dual=False,
penalty='l1'))),
('scaling', StandardScaler()),
('krr',
KernelRidge(kernel='rbf'))]),
n_jobs=-1,
param_distributions={'feature_selection__estimator__C': [0.001,
0.01,
0.1,
1,
2,
3],
'scaling': [StandardScaler(),
MinMaxScaler()]})In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomizedSearchCV(cv=10,
estimator=Pipeline(steps=[('feature_selection',
SelectFromModel(estimator=LinearSVC(dual=False,
penalty='l1'))),
('scaling', StandardScaler()),
('krr',
KernelRidge(kernel='rbf'))]),
n_jobs=-1,
param_distributions={'feature_selection__estimator__C': [0.001,
0.01,
0.1,
1,
2,
3],
'scaling': [StandardScaler(),
MinMaxScaler()]})Pipeline(steps=[('feature_selection',
SelectFromModel(estimator=LinearSVC(C=0.1, dual=False,
penalty='l1'))),
('scaling', StandardScaler()),
('krr', KernelRidge(kernel='rbf'))])SelectFromModel(estimator=LinearSVC(C=0.1, dual=False, penalty='l1'))
LinearSVC(C=0.1, dual=False, penalty='l1')
LinearSVC(C=0.1, dual=False, penalty='l1')
StandardScaler()
KernelRidge(kernel='rbf')
best_model_feature_selection = random_feature_selection.best_estimator_
# get the performance metrics
print(get_regression_metrics(best_model_feature_selection, df_test_stratified[FEATURES], df_test_stratified[TARGET]))
print(get_regression_metrics(best_model_feature_selection, df_train_stratified[FEATURES], df_train_stratified[TARGET]))
print(random_feature_selection.best_params_)
# Create dictionaries with training and test results to create parity plots
res_train_feature_selection = {
'y true': df_train_stratified[TARGET],
'y pred': best_model_feature_selection.predict(df_train_stratified[FEATURES])
}
res_test_feature_selection = {
'y true': df_test_stratified[TARGET],
'y pred': best_model_feature_selection.predict(df_test_stratified[FEATURES])
}
res_train_feature_selection['error'] = res_train_feature_selection['y true'] - res_train_feature_selection['y pred']
res_test_feature_selection['error'] = res_test_feature_selection['y true'] - res_test_feature_selection['y pred']
# plot it
hv.extension('bokeh')
hex_train = hv.HexTiles(res_train_feature_selection, ['y true', 'y pred']).hist(dimension=['y true','y pred'])
hex_test = hv.HexTiles(res_test_feature_selection, ['y true', 'y pred']).hist(dimension=['y true', 'y pred'])
x = np.linspace(0,35, 1000)
line_curve = hv.Curve((x, x), 'x', 'y').opts(color='red', line_dash='dashed')
hex_train * line_curve + hex_test* line_curve
{'mae': np.float64(1.1755634545719345), 'mse': np.float64(3.536376764563055), 'max_error': np.float64(20.483787799504952)}
{'mae': np.float64(1.0726098075960928), 'mse': np.float64(2.4771511842973313), 'max_error': np.float64(13.47643641332416)}
{'scaling': StandardScaler(), 'feature_selection__estimator__C': 0.1}
9. Saving the model¶
Now, that we spent so much time in optimizing our model, we do not want to loose it.
$\color{DarkBlue}{\textsf{Short Exercise}}$
- use the joblib library to save your model
- make sure you can load it again
# Dump your model
joblib.dump(best_krr_model_scaling_hyp, "krr_model.joblib")
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[2], line 2 1 # Dump your model ----> 2 joblib.dump(best_krr_model_scaling_hyp, "krr_model.joblib") NameError: name 'joblib' is not defined
# Try to load it again
model_loaded = joblib.load("krr_model.joblib")
10. Influence of Regularization¶
$\color{DarkBlue}{\textsf{Short Exercise}}$
- what happens if you set $\alpha$ to a really small or to large value? Why is this the case explain what the parameter means using the equation derived in the lectures?
Answer: In Kernel Ridge Regression (KRR), the parameter ( \alpha ) controls the regularization strength. It is included in the loss function as follows:
$$ L = \| y_{ML} - y \|^2_2 + \alpha \| w \|^2_2 $$
- If $\alpha$ is large: The regularization term $\alpha \|w \|^2_2$ heavily penalizes large weights ( w ), resulting in a simpler model that can help prevent overfitting.
- If $\alpha$ is small: The regularization term applies only a small penalty, allowing the model to learn more complex patterns, which increases the risk of overfitting.
To test this, fix this value in one of your pipelines, retrain the models (re-optimizing the other hyperparameters) and rerun the performance evaluation.
Click here for hints
- Check the derivation for ridge regression and KRR in the notes.
- Also remember the loss landscapes we discussed in the lectures about LASSO.
11. Interpreting the model¶
Now, that our model performs decently, we would like to know which features are mainly responsible for this, i.e. how the model performs its reasoning.
One method to do so is the permutation feature importance technique.
$\color{DarkBlue}{\textsf{Short question}}$
We use both descriptors that encode the pore geometry (density, pore diameters, surface areas) as well as some that describe the chemistry of the MOF (the RACs).
- Would you expect the relative importance of these features to be different for the prediction of gas adsorption at high vs low gas pressure?
$\color{Green}{\textsf{Answer}}$: At high pressure, we anticipate that the geometry of the MOF will have a greater impact on CO₂ uptake. This is because, at elevated pressures, more gas molecules collide with the surface of the MOF, increasing the likelihood of them adhering to it, even if the chemical interactions are not particularly favorable. The article (provided in the hint) highlights that pore topology — features such as pore size and shape—plays a major role under these conditions, as larger adsorption sites enhance CO₂ capture by providing ample surface area for collisions. Conversely, at low pressure, there are fewer gas molecules available to attach to the surface, making favorable chemistry crucial for achieving high uptake. Here, functional groups with strong electrostatic interactions, like amino or hydroxyl groups, become more important for binding CO₂ molecules effectively. Thus, we can conclude that while both geometric and chemical features are important for gas adsorption, their relative significance shifts depending on the pressure conditions
Click here for a hint
- An article from Diego et al. (10.1021/acs.chemmater.8b02257) gives some hints.
$\color{DarkBlue}{\textsf{Short Exercise}}$
- Complete the function
_calculate_permutation_scores(which we took from thesklearnpackage) and which is needed to calculate the permutation feature importance using thepermutation_importancefunction.
def _calculate_permutation_scores(estimator, X, y, col_idx, random_state,
n_repeats, scorer):
"""Calculate score when `col_idx` is permuted. Based on the sklearn implementation
estimator: sklearn estimator object
X: pd.Dataframe or np.array
y: pd.Dataframe or np.array
col_idx: int
random_state: int
n_repeats: int
scorer: function that takes model, X and y_true as arguments
"""
random_state = check_random_state(random_state)
X_permuted = X.copy()
scores = np.zeros(n_repeats)
# get the indices
shuffling_idx = np.arange(X.shape[0])
for n_round in range(n_repeats):
# FILL BELOW HERE
# shuffle them (fill in what you want to shuffle)
random_state.shuffle(shuffling_idx)
# Deal with dataframes
if hasattr(X_permuted, "iloc"):
# .iloc selects the indices from a dataframe and you give it [row, column]
col = X_permuted.iloc[shuffling_idx, col_idx]
col.index = X_permuted.index
X_permuted.iloc[:, col_idx] = col
# Deal with numpy arrays
else:
# FILL BELOW HERE
# array indexing is [row, column]
X_permuted[:, col_idx] = X_permuted[shuffling_idx, col_idx]
# Get the scores
feature_score = scorer(estimator, X_permuted, y)
# record the scores in array
scores[n_round] = feature_score
return scores
Nothing to change in the function below, it just call the _calculate_permutation_scores function you just completed.
def permutation_importance(
estimator,
X,
y,
scoring="neg_mean_absolute_error",
n_repeats=5,
n_jobs=2,
random_state=None,
):
"""Permutation importance for feature evaluation
estimator : object
An estimator that has already been :term:`fitted` and is compatible
with :term:`scorer`.
X : ndarray or DataFrame, shape (n_samples, n_features)
Data on which permutation importance will be computed.
y : array-like or None, shape (n_samples, ) or (n_samples, n_classes)
Targets for supervised or `None` for unsupervised.
scoring : string, callable or None, default=None
Scorer to use. It can be a single
string (see :ref:`scoring_parameter`) or a callable (see
:ref:`scoring`). If None, the estimator's default scorer is used.
n_repeats : int, default=5
Number of times to permute a feature.
n_jobs : int or None, default=2
The number of jobs to use for the computation.
`None` means 1 unless in a :obj:`joblib.parallel_backend` context.
`-1` means using all processors. See :term:`Glossary <n_jobs>`
for more details.
random_state : int, RandomState instance, or None, default=None
Pseudo-random number generator to control the permutations of each
feature. See :term:`random_state`.
"""
# Deal with dataframes
if not hasattr(X, "iloc"):
X = check_array(X, force_all_finite="allow-nan", dtype=None)
# Precompute random seed from the random state to be used
# to get a fresh independent RandomState instance for each
# parallel call to _calculate_permutation_scores, irrespective of
# the fact that variables are shared or not depending on the active
# joblib backend (sequential, thread-based or process-based).
random_state = check_random_state(random_state)
random_seed = random_state.randint(np.iinfo(np.int32).max + 1)
# Determine scorer from user options.
scorer = check_scoring(estimator, scoring=scoring)
# get the performance score on the unpermuted data
baseline_score = scorer(estimator, X, y)
# run the permuted evaluations in parallel for each column
scores = Parallel(n_jobs=n_jobs)(
delayed(_calculate_permutation_scores)(
estimator, X, y, col_idx, random_seed, n_repeats, scorer
)
for col_idx in range(X.shape[1])
)
# get difference two
importances = baseline_score - np.array(scores)
# return the results (dictionary)
return Bunch(
importances_mean=np.mean(importances, axis=1),
importances_std=np.std(importances, axis=1),
importances=importances,
)
$\color{DarkBlue}{\textsf{Short Exercise}}$
- Use your function to find the five most important features.
$\color{Green}{\textsf{Answer}}$: The five most important features are sum-f-lig-S-2, sum-f-lig-S-0, total_POV_volumetric, total_SA_volumetric, sum-f-lig-I-0
- Which are they? Did you expect this result?
$\color{Green}{\textsf{Answer}}$: This result was unexpected because we were focused on CO$_2$ uptake at high pressure and initially believed that the geometric properties would play a bigger role. However, the findings indicate that chemical features are more significant, as three of the top five important features are related to chemistry.
from joblib import load
permutation_results = permutation_importance(model_loaded, df_train_stratified[FEATURES], df_train_stratified[TARGET])
import holoviews as hv
hv.extension('bokeh')
permutation_results["features"] = FEATURES
bars = hv.Bars(
permutation_results, "features", ["importances_mean", "importances_std"]
).sort("importances_mean", reverse=True)
errors = hv.ErrorBars(
permutation_results, "features", vdims=["importances_mean", "importances_std"]
).sort("importances_mean", reverse=True)
# Get the indices of the top 5 features with the highest mean importance scores
top_features_indices = np.argsort(permutation_results["importances_mean"])[-5:][::-1]
# extract names
top_features = np.array(FEATURES)[top_features_indices]
print("The five most important features are " + str(", ".join(top_features)) + ".")
bars * errors
The five most important features are sum-f-lig-S-2, sum-f-lig-S-0, total_POV_volumetric, total_SA_volumetric, sum-f-lig-I-0.
Click here for hints
- To get the top
n indices of an arraya, you can usenp.argsort(a)[-n:] - Get the feature names from the
FEATURESlist - combined this might look like
np.array(FEATURES)[np.argsort(a)[-n:]]
Click here for more information on model interpretation
The permutation feature importance technique is not a silver bullet, e.g. there are issues with correlated features. However, it is likely a better choice than feature importance, like impurity decrease, derived from random forests).12. Submit your best model to Kaggle¶
Join the Kaggle competition for this course! For this you can:
- try to continue optimizing your KRR model
- try to use a new model (browse the sklearn documentation for ideas or check out xgboost
The important parts for us here are:
- that you make an attempt to improve your model, discuss this attempt, and use proper models to measure potential improvement
- we will not grade you based on how "fancy" or model is or how well it performs but rather on whether you do something reasonable that is well motivated in your discussion
- you do not need to try both a model and continue optimizing your model. Doing one of them is, in principle, "enough"
Use then your best model to create a submission.csv with your predictions to join the competition and upload it to the competition site.
kaggle_data = pd.read_csv('data/features.csv')
kaggle_predictions = #fillme.predict(kaggle_data[FEATURES])
submission = pd.DataFrame({"id": kaggle_data["id"], "prediction": kaggle_predictions})
submission.to_csv("submission.csv", index=False)